From 35707c5d69650b338bbc6ced081f716ac944208a Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Tue, 5 Aug 2025 16:38:01 +0000
Subject: [PATCH 01/24] [API-Compat] paddle.compat.split is added and tested

---
 python/paddle/__init__.py            |  1 +
 python/paddle/tensor/compat.py       |  8 ++++++++
 python/paddle/tensor/manipulation.py | 30 ++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+)

diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
index df3f0f2509d16c..c969cdfc276e54 100644
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -200,6 +200,7 @@
     pdist,
 )
 from .nn.initializer.lazy_init import LazyGuard
+from .tensor import compat as compat
 from .tensor.attribute import (
     imag,
     is_complex,
diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py
index ad7ec15d1cfae0..837a31f6ba2a84 100644
--- a/python/paddle/tensor/compat.py
+++ b/python/paddle/tensor/compat.py
@@ -18,10 +18,18 @@
 
 import paddle
 from paddle import _C_ops
+from paddle.tensor import fill_constant
 
+from ..base.data_feeder import (
+    check_dtype,
+    check_type,
+    check_variable_and_dtype,
+)
 from ..base.framework import Variable
 from ..framework import (
+    LayerHelper,
     in_dynamic_mode,
+    in_pir_mode,
 )
 
 if TYPE_CHECKING:
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index 403f48d17c2334..4ecae74c7d3d17 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -69,6 +69,36 @@
 __all__ = []
 
 
+def forbid_keywords(illegal_keys, correct_func_name):
+    """
+    A decorator that hints users to use the correct `compat` functions, when erroneous keyword arguments are detected
+
+        illegal_keys: list[str] | str - Forbidden keyword names
+        correct_func_name: str - Recommended function name
+    """
+    if isinstance(illegal_keys, str):
+        illegal_keys = [illegal_keys]
+
+    def decorator(func):
+        def wrapper(*args, **kwargs):
+            found_keys = [key for key in illegal_keys if key in kwargs]
+
+            if found_keys:
+                keys_str = ", ".join(f"'{key}'" for key in found_keys)
+                plural = "s" if len(found_keys) > 1 else ""
+
+                raise TypeError(
+                    f"{func.__name__}() received unexpected keyword argument{plural} {keys_str}. "
+                    f"\nDid you mean to use {correct_func_name}() instead?"
+                )
+
+            return func(*args, **kwargs)
+
+        return wrapper
+
+    return decorator
+
+
 def tensor_array_to_tensor(
     input: Tensor | list[Tensor],
     axis: int = 1,

From 23c422df2d8b8a9b8d82241ff056a0719d2096a7 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Wed, 6 Aug 2025 09:33:46 +0000
Subject: [PATCH 02/24] [API-Compat] paddle.compat.split is rigorously tested

---
 python/paddle/__init__.py                 |  1 -
 python/paddle/tensor/compat.py            |  8 ----
 python/paddle/tensor/manipulation.py      | 10 +----
 python/paddle/utils/__init__.py           |  1 +
 python/paddle/utils/compat_kwarg_check.py | 52 +++++++++++++++++++++++
 5 files changed, 54 insertions(+), 18 deletions(-)
 create mode 100644 python/paddle/utils/compat_kwarg_check.py

diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
index c969cdfc276e54..df3f0f2509d16c 100644
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -200,7 +200,6 @@
     pdist,
 )
 from .nn.initializer.lazy_init import LazyGuard
-from .tensor import compat as compat
 from .tensor.attribute import (
     imag,
     is_complex,
diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py
index 837a31f6ba2a84..ad7ec15d1cfae0 100644
--- a/python/paddle/tensor/compat.py
+++ b/python/paddle/tensor/compat.py
@@ -18,18 +18,10 @@
 
 import paddle
 from paddle import _C_ops
-from paddle.tensor import fill_constant
 
-from ..base.data_feeder import (
-    check_dtype,
-    check_type,
-    check_variable_and_dtype,
-)
 from ..base.framework import Variable
 from ..framework import (
-    LayerHelper,
     in_dynamic_mode,
-    in_pir_mode,
 )
 
 if TYPE_CHECKING:
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index 4ecae74c7d3d17..8036f328697885 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -87,16 +87,8 @@ def wrapper(*args, **kwargs):
                 keys_str = ", ".join(f"'{key}'" for key in found_keys)
                 plural = "s" if len(found_keys) > 1 else ""
 
-                raise TypeError(
-                    f"{func.__name__}() received unexpected keyword argument{plural} {keys_str}. "
-                    f"\nDid you mean to use {correct_func_name}() instead?"
-                )
-
-            return func(*args, **kwargs)
 
-        return wrapper
-
-    return decorator
+__all__ = []
 
 
 def tensor_array_to_tensor(
diff --git a/python/paddle/utils/__init__.py b/python/paddle/utils/__init__.py
index 3fbcf6af86b4df..25b2301d064c8f 100644
--- a/python/paddle/utils/__init__.py
+++ b/python/paddle/utils/__init__.py
@@ -14,6 +14,7 @@
 
 from ..base.framework import require_version
 from . import (  # noqa: F401
+    compat_kwarg_check,
     cpp_extension,
     decorator_utils,
     dlpack,
diff --git a/python/paddle/utils/compat_kwarg_check.py b/python/paddle/utils/compat_kwarg_check.py
new file mode 100644
index 00000000000000..0e8c6729e89bd9
--- /dev/null
+++ b/python/paddle/utils/compat_kwarg_check.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from typing import Any, Callable, TypeVar
+
+F = TypeVar('F', bound=Callable[..., Any])
+
+
+def forbid_keywords(
+    illegal_keys: list[str] | str, correct_func_name: str
+) -> Callable[[F], F]:
+    """
+    A decorator that hints users to use the correct `compat` functions, when erroneous keyword arguments are detected
+
+    Args:
+        illegal_keys: list[str] | str - Forbidden keyword names
+        correct_func_name: str - Recommended function name
+    """
+    if isinstance(illegal_keys, str):
+        illegal_keys = [illegal_keys]
+
+    def decorator(func: F) -> F:
+        def wrapper(*args: Any, **kwargs: Any) -> Any:
+            found_keys = [key for key in illegal_keys if key in kwargs]
+
+            if found_keys:
+                keys_str = ", ".join(f"'{key}'" for key in found_keys)
+                plural = "s" if len(found_keys) > 1 else ""
+
+                raise TypeError(
+                    f"{func.__name__}() received unexpected keyword argument{plural} {keys_str}. "
+                    f"\nDid you mean to use {correct_func_name}() instead?"
+                )
+
+            return func(*args, **kwargs)
+
+        return wrapper
+
+    return decorator

From 309b44ad30ac3549670667aeb60d66fa9eb01c22 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Wed, 6 Aug 2025 12:23:16 +0000
Subject: [PATCH 03/24] [API-Compat] Make the forbid_keywords decorator
 transparent

---
 python/paddle/utils/compat_kwarg_check.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/python/paddle/utils/compat_kwarg_check.py b/python/paddle/utils/compat_kwarg_check.py
index 0e8c6729e89bd9..1da6a1584598c1 100644
--- a/python/paddle/utils/compat_kwarg_check.py
+++ b/python/paddle/utils/compat_kwarg_check.py
@@ -14,7 +14,9 @@
 
 from __future__ import annotations
 
-from typing import Any, Callable, TypeVar
+import functools
+import inspect
+from typing import Any, Callable, TypeVar, cast
 
 F = TypeVar('F', bound=Callable[..., Any])
 
@@ -29,10 +31,12 @@ def forbid_keywords(
         illegal_keys: list[str] | str - Forbidden keyword names
         correct_func_name: str - Recommended function name
     """
-    if isinstance(illegal_keys, str):
-        illegal_keys = [illegal_keys]
+    keys = [illegal_keys] if isinstance(illegal_keys, str) else illegal_keys
 
     def decorator(func: F) -> F:
+        orig_sig = inspect.signature(func)
+
+        @functools.wraps(func)
         def wrapper(*args: Any, **kwargs: Any) -> Any:
             found_keys = [key for key in illegal_keys if key in kwargs]
 
@@ -47,6 +51,14 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
 
             return func(*args, **kwargs)
 
-        return wrapper
+        # Important: function signatures / specs should be copied to avoid erroneous input/output extraction (particularly in static graph, like test_split_op.py)
+        wrapper.__signature__ = orig_sig
+        if hasattr(func, "__defaults__"):
+            wrapper.__defaults__ = func.__defaults__
+        if hasattr(func, "__kwdefaults__"):
+            wrapper.__kwdefaults__ = func.__kwdefaults__
+        wrapper.__wrapped__ = func
+
+        return cast('F', wrapper)
 
     return decorator

From 2a33744d1d99469dab203ca9d6ba2caf1f4184e7 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Wed, 6 Aug 2025 12:43:42 +0000
Subject: [PATCH 04/24] [API-Compat] Fixed decorator str input

---
 python/paddle/utils/compat_kwarg_check.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/paddle/utils/compat_kwarg_check.py b/python/paddle/utils/compat_kwarg_check.py
index 1da6a1584598c1..37384a49ef7942 100644
--- a/python/paddle/utils/compat_kwarg_check.py
+++ b/python/paddle/utils/compat_kwarg_check.py
@@ -31,7 +31,9 @@ def forbid_keywords(
         illegal_keys: list[str] | str - Forbidden keyword names
         correct_func_name: str - Recommended function name
     """
-    keys = [illegal_keys] if isinstance(illegal_keys, str) else illegal_keys
+    illegal_keys = (
+        [illegal_keys] if isinstance(illegal_keys, str) else illegal_keys
+    )
 
     def decorator(func: F) -> F:
         orig_sig = inspect.signature(func)

From 11d9640ac26ce15360dabac0fca5d7871ca8f982 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Sat, 9 Aug 2025 06:03:28 +0000
Subject: [PATCH 05/24] [API-Compat] More unittest & static graph check &
 updated decorator

---
 python/paddle/tensor/manipulation.py      | 22 --------
 python/paddle/utils/__init__.py           |  1 -
 python/paddle/utils/compat_kwarg_check.py | 66 -----------------------
 3 files changed, 89 deletions(-)
 delete mode 100644 python/paddle/utils/compat_kwarg_check.py

diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index 8036f328697885..403f48d17c2334 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -69,28 +69,6 @@
 __all__ = []
 
 
-def forbid_keywords(illegal_keys, correct_func_name):
-    """
-    A decorator that hints users to use the correct `compat` functions, when erroneous keyword arguments are detected
-
-        illegal_keys: list[str] | str - Forbidden keyword names
-        correct_func_name: str - Recommended function name
-    """
-    if isinstance(illegal_keys, str):
-        illegal_keys = [illegal_keys]
-
-    def decorator(func):
-        def wrapper(*args, **kwargs):
-            found_keys = [key for key in illegal_keys if key in kwargs]
-
-            if found_keys:
-                keys_str = ", ".join(f"'{key}'" for key in found_keys)
-                plural = "s" if len(found_keys) > 1 else ""
-
-
-__all__ = []
-
-
 def tensor_array_to_tensor(
     input: Tensor | list[Tensor],
     axis: int = 1,
diff --git a/python/paddle/utils/__init__.py b/python/paddle/utils/__init__.py
index 25b2301d064c8f..3fbcf6af86b4df 100644
--- a/python/paddle/utils/__init__.py
+++ b/python/paddle/utils/__init__.py
@@ -14,7 +14,6 @@
 
 from ..base.framework import require_version
 from . import (  # noqa: F401
-    compat_kwarg_check,
     cpp_extension,
     decorator_utils,
     dlpack,
diff --git a/python/paddle/utils/compat_kwarg_check.py b/python/paddle/utils/compat_kwarg_check.py
deleted file mode 100644
index 37384a49ef7942..00000000000000
--- a/python/paddle/utils/compat_kwarg_check.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import annotations
-
-import functools
-import inspect
-from typing import Any, Callable, TypeVar, cast
-
-F = TypeVar('F', bound=Callable[..., Any])
-
-
-def forbid_keywords(
-    illegal_keys: list[str] | str, correct_func_name: str
-) -> Callable[[F], F]:
-    """
-    A decorator that hints users to use the correct `compat` functions, when erroneous keyword arguments are detected
-
-    Args:
-        illegal_keys: list[str] | str - Forbidden keyword names
-        correct_func_name: str - Recommended function name
-    """
-    illegal_keys = (
-        [illegal_keys] if isinstance(illegal_keys, str) else illegal_keys
-    )
-
-    def decorator(func: F) -> F:
-        orig_sig = inspect.signature(func)
-
-        @functools.wraps(func)
-        def wrapper(*args: Any, **kwargs: Any) -> Any:
-            found_keys = [key for key in illegal_keys if key in kwargs]
-
-            if found_keys:
-                keys_str = ", ".join(f"'{key}'" for key in found_keys)
-                plural = "s" if len(found_keys) > 1 else ""
-
-                raise TypeError(
-                    f"{func.__name__}() received unexpected keyword argument{plural} {keys_str}. "
-                    f"\nDid you mean to use {correct_func_name}() instead?"
-                )
-
-            return func(*args, **kwargs)
-
-        # Important: function signatures / specs should be copied to avoid erroneous input/output extraction (particularly in static graph, like test_split_op.py)
-        wrapper.__signature__ = orig_sig
-        if hasattr(func, "__defaults__"):
-            wrapper.__defaults__ = func.__defaults__
-        if hasattr(func, "__kwdefaults__"):
-            wrapper.__kwdefaults__ = func.__kwdefaults__
-        wrapper.__wrapped__ = func
-
-        return cast('F', wrapper)
-
-    return decorator

From 6a584709602492e1681ab8762bff9d63fff31b2c Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Fri, 8 Aug 2025 10:33:51 +0000
Subject: [PATCH 06/24] [API-Compat] Add paddle.compat.min/max and new PHI
 kernel (min/max_with_index)

---
 .../infer_symbolic_shape/unary_infer_sym.cc   |  23 +-
 .../infer_symbolic_shape/unary_infer_sym.h    |   2 +
 paddle/phi/infermeta/unary.cc                 |  84 +++++
 paddle/phi/infermeta/unary.h                  |   8 +
 .../kernels/gpu/min_max_with_index_kernel.cu  | 312 ++++++++++++++++++
 paddle/phi/kernels/gpu/reduce_kernel.cu       |  52 +++
 .../min_max_with_index_grad_kernel.h.h        |  42 +++
 .../phi/kernels/min_max_with_index_kernel.h   |  40 +++
 paddle/phi/ops/yaml/backward.yaml             |  20 ++
 paddle/phi/ops/yaml/ops.yaml                  |  22 ++
 python/paddle/compat.py                       |   4 +-
 python/paddle/tensor/compat.py                | 151 ++++++++-
 12 files changed, 752 insertions(+), 8 deletions(-)
 create mode 100644 paddle/phi/kernels/gpu/min_max_with_index_kernel.cu
 create mode 100644 paddle/phi/kernels/min_max_with_index_grad_kernel.h.h
 create mode 100644 paddle/phi/kernels/min_max_with_index_kernel.h

diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc
index 6750759633d0b8..16ee03501fa4ab 100644
--- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc
+++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc
@@ -315,8 +315,9 @@ bool AnyOpInferSymbolicShape(pir::Operation *op,
                                  axis.size() == 0 /*reduce_all*/);
 }
 
-bool ArgmaxOpInferSymbolicShape(pir::Operation *op,
-                                pir::InferSymbolicShapeContext *infer_context) {
+bool MinMaxOpInferSymbolicShape(pir::Operation *op,
+                                pir::InferSymbolicShapeContext *infer_context,
+                                bool output_val_and_ind = false) {
   bool flatten = GetBoolAttr(op, "flatten");
   bool keepdims = GetBoolAttr(op, "keepdims");
 
@@ -357,13 +358,23 @@ bool ArgmaxOpInferSymbolicShape(pir::Operation *op,
       symbol::TensorShapeOrDataDimExprs(out_sym_shape)};
 
   infer_context->SetShapeOrDataForValue(op->result(0), shape_data);
+  if (output_val_and_ind)
+    infer_context->SetShapeOrDataForValue(op->result(1), shape_data);
   return true;
 }
 
-bool ArgminOpInferSymbolicShape(pir::Operation *op,
-                                pir::InferSymbolicShapeContext *infer_context) {
-  return ArgmaxOpInferSymbolicShape(op, infer_context);
-}
+#define DEFINE_MINMAX_OP_INFER_FUNC(OpName, output_val_and_ind)               \
+  bool OpName##OpInferSymbolicShape(                                          \
+      pir::Operation *op, pir::InferSymbolicShapeContext *infer_context) {    \
+    return MinMaxOpInferSymbolicShape(op, infer_context, output_val_and_ind); \
+  }
+
+DEFINE_MINMAX_OP_INFER_FUNC(Argmin, false)
+DEFINE_MINMAX_OP_INFER_FUNC(Argmax, false)
+DEFINE_MINMAX_OP_INFER_FUNC(MinWithIndex, true)
+DEFINE_MINMAX_OP_INFER_FUNC(MaxWithIndex, true)
+
+#undef DEFINE_MINMAX_OP_INFER_FUNC
 
 bool AsComplexOpInferSymbolicShape(
     pir::Operation *op, pir::InferSymbolicShapeContext *infer_context) {
diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.h b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.h
index 9868d08d8a290d..8d21b51eb2719f 100755
--- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.h
+++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.h
@@ -93,8 +93,10 @@ OP_DECLARE_INFER_SYMBOLIC_SHAPE(Lu)
 OP_DECLARE_INFER_SYMBOLIC_SHAPE(Lu_)
 OP_DECLARE_INFER_SYMBOLIC_SHAPE(Mode)
 OP_DECLARE_INFER_SYMBOLIC_SHAPE(Max)
+OP_DECLARE_INFER_SYMBOLIC_SHAPE(MaxWithIndex)
 OP_DECLARE_INFER_SYMBOLIC_SHAPE(Maxout)
 OP_DECLARE_INFER_SYMBOLIC_SHAPE(Min)
+OP_DECLARE_INFER_SYMBOLIC_SHAPE(MinWithIndex)
 OP_DECLARE_INFER_SYMBOLIC_SHAPE(Mean)
 OP_DECLARE_INFER_SYMBOLIC_SHAPE(MeanAll)
 OP_DECLARE_INFER_SYMBOLIC_SHAPE(MatrixPower)
diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc
index 405528589b824e..fe014446c88ce2 100644
--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -366,6 +366,90 @@ void ArgMinMaxInferMeta(const MetaTensor& x,
   }
 }
 
+void MinMaxWithIndexInferMeta(const MetaTensor& x,
+                              const Scalar& axis,
+                              bool keepdims,
+                              bool flatten,
+                              MetaTensor* val_out,
+                              MetaTensor* ind_out,
+                              MetaConfig config) {
+  DataType val_dtype = x.dtype();
+
+  if (!config.is_runtime && axis.FromTensor()) {
+    std::vector<int64_t> vec;
+    if (flatten) {
+      if (keepdims) {  // NOLINT
+        vec = std::vector<int64_t>(x.dims().size(), -1);
+      } else {
+        vec = {};
+      }
+    } else {
+      if (keepdims) {
+        vec = std::vector<int64_t>(x.dims().size(), -1);
+      } else {
+        vec = std::vector<int64_t>(x.dims().size() - 1, -1);
+      }
+    }
+    val_out->set_dims(common::make_ddim(vec));
+    val_out->set_dtype(val_dtype);
+    ind_out->set_dims(common::make_ddim(vec));
+    ind_out->set_dtype(DataType::INT64);
+    return;
+  }
+  auto int_axis = axis.to<int64_t>();
+  const auto& x_dims = x.dims();
+
+  auto x_rank = x.dims().size();
+  if (x_rank > 0) {
+    PADDLE_ENFORCE_GE(int_axis,
+                      -x_rank,
+                      common::errors::InvalidArgument(
+                          "'axis'(%d) must be greater than or equal to"
+                          " -Rank(X)(%d).",
+                          int_axis,
+                          -x_rank));
+    PADDLE_ENFORCE_LT(
+        int_axis,
+        x_rank,
+        common::errors::InvalidArgument(
+            "'axis'(%d) must be less than Rank(X)(%d) of Input(X).",
+            int_axis,
+            x_rank));
+  } else {
+    // 0-dim tensor
+    PADDLE_ENFORCE_EQ(int_axis == 0 || int_axis == -1,
+                      true,
+                      common::errors::InvalidArgument(
+                          "'axis'(%d) must be 0 or -1 if input tensor is "
+                          "0-dim.",
+                          int_axis));
+  }
+
+  if (int_axis < 0) int_axis += x_rank;
+
+  std::vector<int64_t> vec;
+  if (flatten) {
+    if (keepdims) {  // NOLINT
+      vec = std::vector<int64_t>(x.dims().size(), 1);
+    } else {
+      vec = {};
+    }
+  } else {
+    for (int64_t i = 0; i < int_axis; i++)
+      vec.emplace_back(x_dims[static_cast<int>(i)]);
+    if (keepdims) {
+      vec.emplace_back(static_cast<int64_t>(1));
+    }
+    for (int64_t i = int_axis + 1; i < x_rank; i++)
+      vec.emplace_back(x_dims[static_cast<int>(i)]);
+  }
+
+  val_out->set_dims(common::make_ddim(vec));
+  val_out->set_dtype(val_dtype);
+  ind_out->set_dims(common::make_ddim(vec));
+  ind_out->set_dtype(DataType::INT64);
+}
+
 void ArgsortInferMeta(const MetaTensor& input,
                       int axis,
                       bool descending,
diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h
index 7334ee476c0ad9..ea6c95748c16c5 100644
--- a/paddle/phi/infermeta/unary.h
+++ b/paddle/phi/infermeta/unary.h
@@ -66,6 +66,14 @@ void ArgMinMaxInferMeta(const MetaTensor& x,
                         MetaTensor* out,
                         MetaConfig config = MetaConfig());
 
+void MinMaxWithIndexInferMeta(const MetaTensor& x,
+                              const Scalar& axis,
+                              bool keepdims,
+                              bool flatten,
+                              MetaTensor* val_out,
+                              MetaTensor* ind_out,
+                              MetaConfig config = MetaConfig());
+
 void ArgsortInferMeta(const MetaTensor& input,
                       int axis,
                       bool descending,
diff --git a/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu b/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu
new file mode 100644
index 00000000000000..57699f2f97e83e
--- /dev/null
+++ b/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu
@@ -0,0 +1,312 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/min_max_with_index_kernel.h"
+
+#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+#if defined(__NVCC__) || defined(__HIPCC__)
+
+#ifdef __NVCC__
+#include "cub/cub.cuh"
+#endif
+#ifdef __HIPCC__
+#include <hipcub/hipcub.hpp>
+namespace cub = hipcub;
+#endif
+#include <limits>
+
+#include "paddle/common/ddim.h"
+#include "paddle/phi/core/utils/data_type.h"
+#include "paddle/phi/kernels/funcs/math_function.h"
+namespace phi {
+
+namespace {  // NOLINT
+template <typename K, typename V>
+using KeyValuePair = cub::KeyValuePair<K, V>;
+
+}  // namespace
+
+#define FIXED_BLOCK_DIM_CASE_BASE(log2_block_dim, ...)  \
+  case (1 << (log2_block_dim)): {                       \
+    constexpr auto kBlockDim = (1 << (log2_block_dim)); \
+    __VA_ARGS__;                                        \
+  } break
+
+#define FIXED_BLOCK_DIM_CASE(...)               \
+  FIXED_BLOCK_DIM_CASE_BASE(10, ##__VA_ARGS__); \
+  FIXED_BLOCK_DIM_CASE_BASE(9, ##__VA_ARGS__);  \
+  FIXED_BLOCK_DIM_CASE_BASE(8, ##__VA_ARGS__);  \
+  FIXED_BLOCK_DIM_CASE_BASE(7, ##__VA_ARGS__);  \
+  FIXED_BLOCK_DIM_CASE_BASE(6, ##__VA_ARGS__);  \
+  FIXED_BLOCK_DIM_CASE_BASE(5, ##__VA_ARGS__);  \
+  FIXED_BLOCK_DIM_CASE_BASE(4, ##__VA_ARGS__);  \
+  FIXED_BLOCK_DIM_CASE_BASE(3, ##__VA_ARGS__);
+
+template <typename T,
+          typename IndType,
+          class Reducer,
+          size_t BlockDim,
+          typename IndexType>
+__global__ void MinMaxWithIndexKernel(const int64_t height,     // n * h
+                                      const int64_t width,      // c
+                                      const int64_t post_size,  // h
+                                      const Reducer reducer,
+                                      const T init,
+                                      const T* in,
+                                      T* val_out,
+                                      IndType* key_out) {
+  typedef cub::BlockReduce<KeyValuePair<IndexType, T>, BlockDim> BlockReduce;
+  __shared__ typename BlockReduce::TempStorage temp_storage;
+
+  for (IndexType idx = blockIdx.x; idx < height; idx += gridDim.x) {
+    KeyValuePair<IndexType, T> kv_pair = {-1, init};
+    IndexType h = idx / post_size;
+    IndexType w = idx % post_size;
+    for (IndexType k = threadIdx.x; k < width; k += blockDim.x) {
+      kv_pair =
+          reducer({k, in[h * width * post_size + k * post_size + w]}, kv_pair);
+    }
+    kv_pair = BlockReduce(temp_storage).Reduce(kv_pair, reducer);
+    if (threadIdx.x == 0) {
+      val_out[idx] = static_cast<T>(kv_pair.value);
+      key_out[idx] = static_cast<IndType>(kv_pair.key);
+    }
+    __syncthreads();
+  }
+}
+
+template <typename T, typename IndType, class Reducer, typename IndexType>
+void ComputeMinMaxWithIndex(const phi::GPUContext& dev_ctx,
+                            const DenseTensor& input,
+                            DenseTensor* values,
+                            DenseTensor* indices,
+                            const int64_t pre,
+                            const int64_t post,
+                            const int64_t n) {
+  auto cu_stream = dev_ctx.stream();
+  auto ComputeBlockSize = [](int64_t col) {
+    auto block_size = 8;
+    if (col > 512)
+      block_size = 1024;
+    else if (col > 256)
+      block_size = 512;
+    else if (col > 128)
+      block_size = 256;
+    else if (col > 64)
+      block_size = 128;
+    else if (col > 32)
+      block_size = 64;
+    else if (col > 16)
+      block_size = 32;
+    else if (col > 8)
+      block_size = 16;
+    return block_size;
+  };
+
+  int64_t max_grid_dimx = dev_ctx.GetCUDAMaxGridDimSize()[0];
+  int64_t height = pre * post;
+  int64_t width = n;
+  int64_t grid_size = height < max_grid_dimx ? height : max_grid_dimx;
+
+  const T* in_data = input.data<T>();
+
+  T* val_data = dev_ctx.template Alloc<T>(values);
+  IndType* ind_data = dev_ctx.template Alloc<IndType>(indices);
+
+  if (typeid(Reducer) == typeid(cub::ArgMax)) {
+    switch (ComputeBlockSize(width)) {
+      FIXED_BLOCK_DIM_CASE(
+          MinMaxWithIndexKernel<T, IndType, Reducer, kBlockDim, IndexType>
+          <<<grid_size, kBlockDim, 0, cu_stream>>>(
+              height,
+              width,
+              post,
+              Reducer(),
+              std::numeric_limits<T>::lowest(),
+              in_data,
+              val_data,
+              ind_data));
+    }
+  } else {
+    switch (ComputeBlockSize(width)) {
+      FIXED_BLOCK_DIM_CASE(
+          MinMaxWithIndexKernel<T, IndType, Reducer, kBlockDim, IndexType>
+          <<<grid_size, kBlockDim, 0, cu_stream>>>(
+              height,
+              width,
+              post,
+              Reducer(),
+              std::numeric_limits<T>::max(),
+              in_data,
+              val_data,
+              ind_data));
+    }
+  }
+}
+
+template <typename Context, typename T, class Reducer>
+struct VisitDataCudaMinMaxWithIndexFunctor {
+  const Context& dev_ctx;
+  const DenseTensor& x;
+  int64_t axis;
+  bool keepdims;
+  bool flatten;
+  DenseTensor* val_out;
+  DenseTensor* ind_out;
+
+  explicit VisitDataCudaMinMaxWithIndexFunctor(const Context& dev_ctx,
+                                               const DenseTensor& x,
+                                               int64_t axis,
+                                               bool keepdims,
+                                               bool flatten,
+                                               DenseTensor* val_out,
+                                               DenseTensor* ind_out)
+      : dev_ctx(dev_ctx),
+        x(x),
+        axis(axis),
+        keepdims(keepdims),
+        flatten(flatten),
+        val_out(val_out),
+        ind_out(ind_out) {}
+
+  template <typename IndType>
+  void apply() const {
+    phi::DDim x_dims;
+    int new_axis = axis;
+    if (flatten) {
+      x_dims = common::make_ddim({x.numel()});
+      // if flatten, the axis just as 0
+      new_axis = 0;
+    } else {
+      x_dims = x.dims();
+      if (axis < 0) new_axis = axis + x.dims().size();
+    }
+    if (x.numel() == 0) {
+      dev_ctx.template Alloc<T>(val_out);
+      dev_ctx.template Alloc<IndType>(ind_out);
+      return;
+    }
+    // For 0D Tensor
+    if (x.dims().size() == 0) {
+      dev_ctx.template Alloc<T>(val_out);
+      dev_ctx.template Alloc<IndType>(ind_out);
+      phi::funcs::set_constant(dev_ctx, ind_out, static_cast<IndType>(0));
+      phi::Copy<Context>(dev_ctx, x, dev_ctx.GetPlace(), false, val_out);
+      return;
+    }
+
+    int64_t numel = x.numel();
+    int64_t groups = numel / x_dims[new_axis];
+    int64_t pre = 1;
+    int64_t post = 1;
+    int64_t n = x_dims[new_axis];
+
+    for (int i = 0; i < new_axis; i++) {
+      pre *= x_dims[i];
+    }
+
+    for (int i = new_axis + 1; i < x_dims.size(); i++) {
+      post *= x_dims[i];
+    }
+
+    if (numel > std::numeric_limits<int32_t>::max()) {
+      ComputeMinMaxWithIndex<T, IndType, Reducer, int64_t>(
+          dev_ctx, x, val_out, ind_out, pre, post, n);
+    } else {
+      ComputeMinMaxWithIndex<T, IndType, Reducer, int32_t>(
+          dev_ctx, x, val_out, ind_out, pre, post, n);
+    }
+  }
+};
+
+template <typename Context, typename T, class Reducer>
+void MinMaxWithIndexOpCUDAKernel(const Context& dev_ctx,
+                                 const DenseTensor& x,
+                                 const Scalar& axis,
+                                 bool keepdims,
+                                 bool flatten,
+                                 DenseTensor* val_out,
+                                 DenseTensor* ind_out) {
+  PADDLE_ENFORCE_GE(
+      x.numel(),
+      0,
+      common::errors::InvalidArgument(
+          "(min/max)_with_index input numel must > 0, bug got %d", x.numel()));
+  phi::VisitDataTypeTiny(
+      phi::DataType::INT64,
+      VisitDataCudaMinMaxWithIndexFunctor<Context, T, Reducer>(
+          dev_ctx, x, axis.to<int64_t>(), keepdims, flatten, val_out, ind_out));
+}
+
+template <typename T, typename Context>
+void MinWithIndexKernel(const Context& dev_ctx,
+                        const DenseTensor& x,
+                        const Scalar& axis,
+                        bool keepdims,
+                        bool flatten,
+                        DenseTensor* val_out,
+                        DenseTensor* ind_out) {
+  MinMaxWithIndexOpCUDAKernel<Context, T, cub::ArgMin>(
+      dev_ctx, x, axis, keepdims, flatten, val_out, ind_out);
+}
+
+template <typename T, typename Context>
+void MaxWithIndexKernel(const Context& dev_ctx,
+                        const DenseTensor& x,
+                        const Scalar& axis,
+                        bool keepdims,
+                        bool flatten,
+                        DenseTensor* val_out,
+                        DenseTensor* ind_out) {
+  MinMaxWithIndexOpCUDAKernel<Context, T, cub::ArgMax>(
+      dev_ctx, x, axis, keepdims, flatten, val_out, ind_out);
+}
+
+#endif
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(min_with_index,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::MinWithIndexKernel,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16,
+                   float,
+                   double,
+                   int32_t,
+                   int64_t,
+                   int16_t,
+                   uint8_t) {
+  kernel->OutputAt(0).SetDataType(kernel->InputAt(0).dtype);
+  kernel->OutputAt(1).SetDataType(phi::DataType::INT64);
+}
+
+PD_REGISTER_KERNEL(max_with_index,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::MaxWithIndexKernel,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16,
+                   float,
+                   double,
+                   int32_t,
+                   int64_t,
+                   int16_t,
+                   uint8_t) {
+  kernel->OutputAt(0).SetDataType(kernel->InputAt(0).dtype);
+  kernel->OutputAt(1).SetDataType(phi::DataType::INT64);
+}
diff --git a/paddle/phi/kernels/gpu/reduce_kernel.cu b/paddle/phi/kernels/gpu/reduce_kernel.cu
index 95132d09e2cc22..bc4de24540346a 100644
--- a/paddle/phi/kernels/gpu/reduce_kernel.cu
+++ b/paddle/phi/kernels/gpu/reduce_kernel.cu
@@ -159,6 +159,36 @@ void ReduceAMaxGradKernel(const Context& dev_ctx,
       dev_ctx, x, out, out_grad, dims, keep_dim, reduce_all, x_grad);
 }
 
+template <typename T, typename Context>
+void MinWithIndexGradKernel(const Context& dev_ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& values,
+                            const DenseTensor& values_grad,
+                            const Scalar& dim,
+                            bool keepdims,
+                            bool flatten,
+                            DenseTensor* x_grad) {
+  int64_t dim_val = dim.to<int64_t>();
+  flatten = recompute_reduce_all(x, {dim_val}, flatten);
+  ReduceCudaAMaxAMinGrad<T, Context>(
+      dev_ctx, x, values, values_grad, {dim_val}, keepdims, flatten, x_grad);
+}
+
+template <typename T, typename Context>
+void MaxWithIndexGradKernel(const Context& dev_ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& values,
+                            const DenseTensor& values_grad,
+                            const Scalar& dim,
+                            bool keepdims,
+                            bool flatten,
+                            DenseTensor* x_grad) {
+  int64_t dim_val = dim.to<int64_t>();
+  flatten = recompute_reduce_all(x, {dim_val}, flatten);
+  ReduceCudaAMaxAMinGrad<T, Context>(
+      dev_ctx, x, values, values_grad, {dim_val}, keepdims, flatten, x_grad);
+}
+
 template <typename T, typename Context>
 void ReduceMaxGradKernel(const Context& dev_ctx,
                          const DenseTensor& x,
@@ -284,6 +314,17 @@ PD_REGISTER_KERNEL(max_grad,
                    phi::dtype::float16,
                    phi::dtype::bfloat16) {}
 
+PD_REGISTER_KERNEL(max_with_index_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::MaxWithIndexGradKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
+
 PD_REGISTER_KERNEL(mean_grad,
                    GPU,
                    ALL_LAYOUT,
@@ -310,6 +351,17 @@ PD_REGISTER_KERNEL(min_grad,
                    phi::dtype::float16,
                    phi::dtype::bfloat16) {}
 
+PD_REGISTER_KERNEL(min_with_index_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::MinWithIndexGradKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
+
 PD_REGISTER_KERNEL(sum_grad,
                    GPU,
                    ALL_LAYOUT,
diff --git a/paddle/phi/kernels/min_max_with_index_grad_kernel.h.h b/paddle/phi/kernels/min_max_with_index_grad_kernel.h.h
new file mode 100644
index 00000000000000..30c4fd34bb281e
--- /dev/null
+++ b/paddle/phi/kernels/min_max_with_index_grad_kernel.h.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/common/scalar.h"
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void MaxWithIndexGradKernel(const Context& dev_ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& values,
+                            const DenseTensor& values_grad,
+                            const Scalar& dim,
+                            bool keepdims,
+                            bool flatten,
+                            DenseTensor* x_grad);
+
+template <typename T, typename Context>
+void MinWithIndexGradKernel(const Context& dev_ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& values,
+                            const DenseTensor& values_grad,
+                            const Scalar& dim,
+                            bool keepdims,
+                            bool flatten,
+                            DenseTensor* x_grad);
+
+}  // namespace phi
diff --git a/paddle/phi/kernels/min_max_with_index_kernel.h b/paddle/phi/kernels/min_max_with_index_kernel.h
new file mode 100644
index 00000000000000..2e5ad70feaaec4
--- /dev/null
+++ b/paddle/phi/kernels/min_max_with_index_kernel.h
@@ -0,0 +1,40 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/phi/common/scalar.h"
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void MinWithIndexKernel(const Context& dev_ctx,
+                        const DenseTensor& x,
+                        const Scalar& axis,
+                        bool keepdims,
+                        bool flatten,
+                        DenseTensor* val_out,
+                        DenseTensor* ind_out);
+
+template <typename T, typename Context>
+void MaxWithIndexKernel(const Context& dev_ctx,
+                        const DenseTensor& x,
+                        const Scalar& axis,
+                        bool keepdims,
+                        bool flatten,
+                        DenseTensor* val_out,
+                        DenseTensor* ind_out);
+
+}  // namespace phi
diff --git a/paddle/phi/ops/yaml/backward.yaml b/paddle/phi/ops/yaml/backward.yaml
index 5364fa6ff73b9c..f75e509f6aaa22 100644
--- a/paddle/phi/ops/yaml/backward.yaml
+++ b/paddle/phi/ops/yaml/backward.yaml
@@ -2277,6 +2277,16 @@
   kernel :
     func : max_pool3d_with_index_grad
 
+- backward_op : max_with_index_grad
+  forward : max_with_index (Tensor x, Scalar axis, bool keepdims, bool flatten) -> Tensor(values), Tensor(indices)
+  args : (Tensor x, Tensor values, Tensor values_grad, Scalar axis, bool keepdims, bool flatten)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : max_with_index_grad
+
 - backward_op : maxout_grad
   forward : maxout(Tensor x, int groups, int axis) -> Tensor(out)
   args : (Tensor x, Tensor out, Tensor out_grad, int groups, int axis)
@@ -2340,6 +2350,16 @@
     func : meshgrid_grad
     data_type : out_grad
 
+- backward_op : min_with_index_grad
+  forward : min_with_index (Tensor x, Scalar axis, bool keepdims, bool flatten) -> Tensor(values), Tensor(indices)
+  args : (Tensor x, Tensor values, Tensor values_grad, Scalar axis, bool keepdims, bool flatten)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : min_with_index_grad
+
 - backward_op : mish_grad
   forward : mish (Tensor x, float lambda) -> Tensor(out)
   args : (Tensor x, Tensor out_grad, float lambda)
diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml
index b5f4d6371a82b1..9d84600531a792 100644
--- a/paddle/phi/ops/yaml/ops.yaml
+++ b/paddle/phi/ops/yaml/ops.yaml
@@ -3577,6 +3577,17 @@
   backward : max_pool3d_with_index_grad
   interfaces : paddle::dialect::InferSymbolicShapeInterface
 
+- op : max_with_index
+  args : (Tensor x, Scalar(int64_t) axis, bool keepdims = false, bool flatten = false)
+  output : Tensor(values), Tensor(indices)
+  infer_meta :
+    func : MinMaxWithIndexInferMeta
+  kernel :
+    func : max_with_index
+    data_type : x
+  backward : max_with_index_grad
+  interfaces : paddle::dialect::InferSymbolicShapeInterface, paddle::dialect::LayoutTransformationInterface
+
 - op : maxout
   args : (Tensor x, int groups, int axis = 1)
   output : Tensor(out)
@@ -3686,6 +3697,17 @@
   backward : meshgrid_grad
   interfaces : paddle::dialect::InferSymbolicShapeInterface
 
+- op : min_with_index
+  args : (Tensor x, Scalar(int64_t) axis, bool keepdims = false, bool flatten = false)
+  output : Tensor(values), Tensor(indices)
+  infer_meta :
+    func : MinMaxWithIndexInferMeta
+  kernel :
+    func : min_with_index
+    data_type : x
+  backward : min_with_index_grad
+  interfaces : paddle::dialect::InferSymbolicShapeInterface, paddle::dialect::LayoutTransformationInterface
+
 - op : mish
   args : (Tensor x, float lambda)
   output : Tensor
diff --git a/python/paddle/compat.py b/python/paddle/compat.py
index 2a37393e9053f8..023fe2efcbe325 100644
--- a/python/paddle/compat.py
+++ b/python/paddle/compat.py
@@ -14,8 +14,10 @@
 
 from .tensor.compat import (
     Unfold,
+    max,
+    min,
     sort,
     split,
 )
 
-__all__ = ['split', 'sort', 'Unfold']
+__all__ = ['split', 'sort', 'Unfold', 'min', 'max']
diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py
index ad7ec15d1cfae0..0c3c613a8b83e8 100644
--- a/python/paddle/tensor/compat.py
+++ b/python/paddle/tensor/compat.py
@@ -14,7 +14,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, NamedTuple
+from typing import TYPE_CHECKING, Any, NamedTuple
 
 import paddle
 from paddle import _C_ops
@@ -223,6 +223,10 @@ class SortRetType(NamedTuple):
     values: Tensor
     indices: Tensor
 
+class MinMaxRetType(NamedTuple):
+    values: Tensor
+    indices: Tensor
+
 
 def _check_out_status(
     out: Tensor | tuple[Tensor, Tensor] | list[Tensor],
@@ -398,3 +402,148 @@ def to_list_if_necessary(x, size_check=False):
             dilations=to_list_if_necessary(self.dilations),
             name=self.name,
         )
+def _min_max_param_checker(func_name: str, *args: Any, **kwargs: Any):
+    def invalid_arguments_exception(error_prefix=""):
+        type_strs = [type(v).__name__ for v in args]
+        type_strs.extend([f"{k}={type(v).__name__}" for k, v in kwargs.items()])
+        signature = ", ".join(type_strs)
+
+        error_msg = (
+            f"Invalid arguments for `paddle.compat.{func_name}`:\n{error_prefix}"
+            f"Got: (paddle.Tensor input, {signature}), but expect one of:\n"
+            f" - (input: paddle.Tensor) for reduce_{func_name} on all dims.\n"
+            f" - (input: paddle.Tensor, other: paddle.Tensor) -> see paddle.{func_name}imum\n"
+            f" - (input: paddle.Tensor, int dim (cannot be None), bool keepdim = False)\n"
+        )
+        return TypeError(error_msg)
+
+    def try_get_keys(key):
+        res = None
+        try:
+            res = kwargs[key]
+        except KeyError:
+            raise invalid_arguments_exception() from None
+        return res
+        found_key = None
+
+    dim_or_other = None
+    keepdim = False
+
+    num_args = len(args)
+    total_arg_num = num_args + len(kwargs)
+    if total_arg_num > 2:
+        raise invalid_arguments_exception()
+    elif total_arg_num == 2:
+        if num_args == 2:
+            dim_or_other, keepdim = args
+            if dim_or_other is None or isinstance(
+                dim_or_other, (Variable, paddle.pir.Value)
+            ):
+                raise invalid_arguments_exception()
+        elif num_args == 1:
+            dim_or_other = args[0]
+            if dim_or_other is None or isinstance(
+                dim_or_other, (Variable, paddle.pir.Value)
+            ):
+                raise invalid_arguments_exception()
+            keepdim = try_get_keys("keepdim")
+        else:
+            dim_or_other = try_get_keys("dim")
+            keepdim = try_get_keys("keepdim")
+    elif total_arg_num == 1:
+        if num_args:
+            dim_or_other = args[0]
+            if dim_or_other is None:
+                raise invalid_arguments_exception()
+        else:
+            if "dim" in kwargs:
+                dim_or_other = kwargs["dim"]
+            elif "other" in kwargs:
+                dim_or_other = kwargs["other"]
+                if not isinstance(dim_or_other, (Variable, paddle.pir.Value)):
+                    raise invalid_arguments_exception()
+            if dim_or_other is None:
+                raise invalid_arguments_exception()
+
+    if (
+        dim_or_other is not None
+        and not isinstance(dim_or_other, (Variable, paddle.pir.Value))
+        and type(dim_or_other) is not int
+    ):
+        raise invalid_arguments_exception(
+            f"The second input must be int or Tensor or implicit None in compat.min, but received {type(dim_or_other)}.\n"
+        )
+
+    return dim_or_other, keepdim
+
+
+@forbid_keywords(['x', 'axis'], 'paddle.min')
+def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
+    if not isinstance(input, paddle.pir.Value) and not isinstance(
+        input, paddle.Tensor
+    ):
+        raise TypeError(
+            f"input should be a tensor, but got an instance with type '{type(input).__name__}'"
+        )
+
+    dim_or_other, keepdim = _min_max_param_checker("min", *args, **kwargs)
+
+    if dim_or_other is None:
+        return _C_ops.min(input, None, False)
+    elif isinstance(dim_or_other, int):
+        if input.place.is_gpu_place():
+            vals, inds = _C_ops.min_with_index(
+                input, dim_or_other, keepdim, False
+            )
+            inds.stop_gradient = True
+            return MinMaxRetType(values=vals, indices=inds)
+        else:
+            # CPUPlace and other placements are implemented by composition
+            indices = _C_ops.argmin(
+                input, dim_or_other, True, False, paddle.int64
+            )
+            values = _C_ops.take_along_axis(input, indices, dim_or_other)
+            if keepdim:
+                return MinMaxRetType(values=values, indices=indices)
+            return MinMaxRetType(
+                values=values.squeeze_(axis=dim_or_other),
+                indices=indices.squeeze_(axis=dim_or_other),
+            )
+    else:
+        return _C_ops.minimum(input, dim_or_other)
+
+
+@forbid_keywords(['x', 'axis'], 'paddle.max')
+def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
+    if not isinstance(input, paddle.pir.Value) and not isinstance(
+        input, paddle.Tensor
+    ):
+        raise TypeError(
+            f"input should be a tensor, but got an instance with type '{type(input).__name__}'"
+        )
+
+    dim_or_other, keepdim = _min_max_param_checker("max", *args, **kwargs)
+
+    if dim_or_other is None:
+        return _C_ops.max(input, None, False)
+    elif isinstance(dim_or_other, int):
+        if input.place.is_gpu_place():
+            vals, inds = _C_ops.max_with_index(
+                input, dim_or_other, keepdim, False
+            )
+            inds.stop_gradient = True
+            return MinMaxRetType(values=vals, indices=inds)
+        else:
+            # CPUPlace and other placements are implemented by composition
+            indices = _C_ops.argmax(
+                input, dim_or_other, True, False, paddle.int64
+            )
+            values = _C_ops.take_along_axis(input, indices, dim_or_other)
+            if keepdim:
+                return MinMaxRetType(values=values, indices=indices)
+            return MinMaxRetType(
+                values=values.squeeze_(axis=dim_or_other),
+                indices=indices.squeeze_(axis=dim_or_other),
+            )
+    else:
+        return _C_ops.maximum(input, dim_or_other)

From 6255ed9c6925ae7c2f6bcd015cb888058dad1bc3 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Sat, 9 Aug 2025 17:16:21 +0000
Subject: [PATCH 07/24] [API-Compat] Add compat.min/max EN doc

Attempting to fix integral type gradient computation (rejection)
---
 paddle/phi/kernels/gpu/reduce_kernel.cu |  53 ++++++-
 python/paddle/tensor/compat.py          | 177 +++++++++++++++++++++++-
 2 files changed, 226 insertions(+), 4 deletions(-)

diff --git a/paddle/phi/kernels/gpu/reduce_kernel.cu b/paddle/phi/kernels/gpu/reduce_kernel.cu
index bc4de24540346a..089cb3601f0a5b 100644
--- a/paddle/phi/kernels/gpu/reduce_kernel.cu
+++ b/paddle/phi/kernels/gpu/reduce_kernel.cu
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "paddle/phi/kernels/reduce_kernel.h"
+#include <type_traits>
 
 #include "paddle/phi/kernels/gpu/reduce_amin_amax_common.h"
 #include "paddle/phi/kernels/reduce_amin_grad_kernel.h"
@@ -159,7 +160,15 @@ void ReduceAMaxGradKernel(const Context& dev_ctx,
       dev_ctx, x, out, out_grad, dims, keep_dim, reduce_all, x_grad);
 }
 
-template <typename T, typename Context>
+template <typename T>
+using EnableIfInteger =
+    typename std::enable_if<std::is_integral<T>::value, int>::type;
+
+template <typename T>
+using EnableIfNonInteger =
+    typename std::enable_if<!std::is_integral<T>::value, int>::type;
+
+template <typename T, typename Context, EnableIfNonInteger<T> = 0>
 void MinWithIndexGradKernel(const Context& dev_ctx,
                             const DenseTensor& x,
                             const DenseTensor& values,
@@ -174,7 +183,25 @@ void MinWithIndexGradKernel(const Context& dev_ctx,
       dev_ctx, x, values, values_grad, {dim_val}, keepdims, flatten, x_grad);
 }
 
-template <typename T, typename Context>
+template <typename T, typename Context, EnableIfInteger<T> = 0>
+void MinWithIndexGradKernel(const Context& dev_ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& values,
+                            const DenseTensor& values_grad,
+                            const Scalar& dim,
+                            bool keepdims,
+                            bool flatten,
+                            DenseTensor* x_grad) {
+  std::string dtype_name = phi::DataTypeToString(x.dtype());
+  PADDLE_ENFORCE_EQ(
+      0,
+      1,
+      phi::errors::InvalidArgument(
+          "Integer type '%s' is not allowed to have stop_gradient=False.",
+          dtype_name.c_str()));
+}
+
+template <typename T, typename Context, EnableIfNonInteger<T> = 0>
 void MaxWithIndexGradKernel(const Context& dev_ctx,
                             const DenseTensor& x,
                             const DenseTensor& values,
@@ -189,6 +216,24 @@ void MaxWithIndexGradKernel(const Context& dev_ctx,
       dev_ctx, x, values, values_grad, {dim_val}, keepdims, flatten, x_grad);
 }
 
+template <typename T, typename Context, EnableIfInteger<T> = 0>
+void MaxWithIndexGradKernel(const Context& dev_ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& values,
+                            const DenseTensor& values_grad,
+                            const Scalar& dim,
+                            bool keepdims,
+                            bool flatten,
+                            DenseTensor* x_grad) {
+  std::string dtype_name = phi::DataTypeToString(x.dtype());
+  PADDLE_ENFORCE_EQ(
+      0,
+      1,
+      phi::errors::InvalidArgument(
+          "Integer type '%s' is not allowed to have stop_gradient=False.",
+          dtype_name.c_str()));
+}
+
 template <typename T, typename Context>
 void ReduceMaxGradKernel(const Context& dev_ctx,
                          const DenseTensor& x,
@@ -320,7 +365,9 @@ PD_REGISTER_KERNEL(max_with_index_grad,
                    phi::MaxWithIndexGradKernel,
                    float,
                    double,
+                   uint8_t,
                    int,
+                   int16_t,
                    int64_t,
                    phi::dtype::float16,
                    phi::dtype::bfloat16) {}
@@ -357,7 +404,9 @@ PD_REGISTER_KERNEL(min_with_index_grad,
                    phi::MinWithIndexGradKernel,
                    float,
                    double,
+                   uint8_t,
                    int,
+                   int16_t,
                    int64_t,
                    phi::dtype::float16,
                    phi::dtype::bfloat16) {}
diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py
index 0c3c613a8b83e8..c6cfdee988d34b 100644
--- a/python/paddle/tensor/compat.py
+++ b/python/paddle/tensor/compat.py
@@ -477,14 +477,108 @@ def try_get_keys(key):
     return dim_or_other, keepdim
 
 
-@forbid_keywords(['x', 'axis'], 'paddle.min')
+def _min_max_tensor_allow_grad(input: Tensor):
+    """Prevent integral input tensor type to have `stop_gradient=False`"""
+    in_dtype = input.dtype
+    if (
+        in_dtype == paddle.int32
+        or in_dtype == paddle.int64
+        or in_dtype == paddle.uint8
+        or in_dtype == paddle.int16
+    ):
+        if not input.stop_gradient:
+            raise TypeError(
+                f"Tensors with integral type: '{in_dtype}' should stop gradient."
+            )
+
+
+@ForbidKeywordsDecorator(
+    illegal_keys=['x', 'axis'],
+    func_name="paddle.compat.min",
+    correct_name='paddle.min',
+)
 def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
+    """
+
+    Computes the minimum of tensor elements. There are mainly 3 cases (functionalities):
+    1. paddle.compat.min(input: Tensor): reduce min over all dims, return a single value Tensor
+    2. paddle.compat.min(input: Tensor, dim: int (cannot be None), keepdim=False): reduce min over the given dim,
+        returns a named tuple MinMaxRetType(values: Tensor, indices: Tensor)
+    3. paddle.compat.min(input: Tensor, other: Tensor): see `paddle.minimum`
+
+    Note: If there are multiple minimum elements, this API evenly distributes gradient between these equal values,
+        following torch.min. The gradient behavior of `values` for case 2 is the same as `paddle.amin`.
+
+    Args:
+        input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64.
+        dim (int, optional): The dim along which the minimum is computed.
+            If this is not specified: see case 1, note that: `None` cannot be passed to this (TypeError will be thrown)
+            compute the minimum over all elements of `input` and return a Tensor with a single element,
+            otherwise must be in the range :math:`[-input.ndim, input.ndim)`.
+            If :math:`dim < 0`, the axis to reduce is :math:`input.ndim + dim`.
+        keepdim (bool, optional): Whether to reserve the reduced dimension in the
+            output Tensor. The result tensor will have one fewer dimension
+            than the `input` unless :attr:`keepdim` is true, default
+            value is False. Note that if `dim` does not appear in neither (*args) or (**kwargs), this parameter cannot be passed alone
+        other (Tensor, optional): the other tensor to perform `paddle.minimum` with. This Tensor should
+            have the same or broadcast-able shape as the `input`. Note that (`dim` & `keepdim`) and `other` are mutually exclusive
+            meaning that trying to composite both will result in TypeError
+
+    Returns:
+        - For case 1: a single value Tensor (0-dim)
+        - For case 2: a named tuple MinMaxRetType(values: Tensor, indices: Tensor), `values` has the same data type as the `input`,
+            while indices is always an int64 Tensor, with exactly the same shape as `values`.
+            MinMaxRetType can be used (indexed, packed, unpacked) in the same way as a regular tuple
+        - For case 3: see `paddle.minimum`
+
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+
+            >>> # data_x is a Tensor with shape [2, 4]
+            >>> # the axis is a int element
+            >>> x = paddle.to_tensor([[0.2, 0.3, 0.5, 0.9],
+            ...                       [0.1, 0.2, 0.6, 0.7]],
+            ...                       dtype='float64', stop_gradient=False)
+            >>> # Case 1: reduce over all dims
+            >>> result1 = paddle.compat.min(x)
+            >>> result1
+            Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=False,
+            0.10000000)
+
+            >>> # Case 2: reduce over specified dim
+            >>> x.clear_grad()
+            >>> result2 = paddle.compat.min(x, dim=1)
+            >>> result2
+            MinMaxRetType(values=Tensor(shape=[2], dtype=float64, place=Place(gpu:0), stop_gradient=False,
+                [0.20000000, 0.10000000]), indices=Tensor(shape=[2], dtype=int64, place=Place(gpu:0), stop_gradient=True,
+                [0, 0]))
+            >>> result2[0].backward()
+            >>> x.grad
+            Tensor(shape=[2, 4], dtype=float64, place=Place(gpu:0), stop_gradient=False,
+                [[1., 0., 0., 0.],
+                 [1., 0., 0., 0.]])
+
+            >>> # Case 3: equivalent to `paddle.minimum`
+            >>> x.clear_grad()
+            >>> y = paddle.to_tensor([[0.5, 0.4, 0.1, 0.2],
+            ...                       [0.3, 0.1, 0.6, 0.7]],
+            ...                       dtype='float64', stop_gradient=False)
+            >>> result3 = paddle.compat.min(x, y)
+            >>> result3
+            Tensor(shape=[2, 4], dtype=float64, place=Place(gpu:0), stop_gradient=False,
+                [[0.20000000, 0.30000000, 0.10000000, 0.20000000],
+                 [0.10000000, 0.10000000, 0.60000000, 0.70000000]])
+    """
     if not isinstance(input, paddle.pir.Value) and not isinstance(
         input, paddle.Tensor
     ):
         raise TypeError(
             f"input should be a tensor, but got an instance with type '{type(input).__name__}'"
         )
+    _min_max_tensor_allow_grad(input)
 
     dim_or_other, keepdim = _min_max_param_checker("min", *args, **kwargs)
 
@@ -513,14 +607,93 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
         return _C_ops.minimum(input, dim_or_other)
 
 
-@forbid_keywords(['x', 'axis'], 'paddle.max')
+@ForbidKeywordsDecorator(
+    illegal_keys=['x', 'axis'],
+    func_name="paddle.compat.max",
+    correct_name='paddle.max',
+)
 def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
+    """
+
+    Computes the maximum of tensor elements. There are mainly 3 cases (functionalities):
+    1. paddle.compat.max(input: Tensor): reduce max over all dims, return a single value Tensor
+    2. paddle.compat.max(input: Tensor, dim: int (cannot be None), keepdim=False): reduce max over the given dim,
+        returns a named tuple MinMaxRetType(values: Tensor, indices: Tensor)
+    3. paddle.compat.max(input: Tensor, other: Tensor): see `paddle.maximum`
+
+    Note: If there are multiple maximum elements, this API evenly distributes gradient between these equal values,
+        following torch.max. The gradient behavior of `values` for case 2 is the same as `paddle.amax`.
+
+    Args:
+        input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64.
+        dim (int, optional): The dim along which the maximum is computed.
+            If this is not specified: see case 1, note that: `None` cannot be passed to this (TypeError will be thrown)
+            compute the maximum over all elements of `input` and return a Tensor with a single element,
+            otherwise must be in the range :math:`[-input.ndim, input.ndim)`.
+            If :math:`dim < 0`, the axis to reduce is :math:`input.ndim + dim`.
+        keepdim (bool, optional): Whether to reserve the reduced dimension in the
+            output Tensor. The result tensor will have one fewer dimension
+            than the `input` unless :attr:`keepdim` is true, default
+            value is False. Note that if `dim` does not appear in neither (*args) or (**kwargs), this parameter cannot be passed alone
+        other (Tensor, optional): the other tensor to perform `paddle.maximum` with. This Tensor should
+            have the same or broadcast-able shape as the `input`. Note that (`dim` & `keepdim`) and `other` are mutually exclusive
+            meaning that trying to composite both will result in TypeError
+
+    Returns:
+        - For case 1: a single value Tensor (0-dim)
+        - For case 2: a named tuple MinMaxRetType(values: Tensor, indices: Tensor), `values` has the same data type as the `input`,
+            while indices is always an int64 Tensor, with exactly the same shape as `values`.
+            MinMaxRetType can be used (indexed, packed, unpacked) in the same way as a regular tuple
+        - For case 3: see `paddle.maximum`
+
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+
+            >>> # data_x is a Tensor with shape [2, 4]
+            >>> # the axis is a int element
+            >>> x = paddle.to_tensor([[0.2, 0.3, 0.5, 0.9],
+            ...                       [0.1, 0.2, 0.6, 0.7]],
+            ...                       dtype='float64', stop_gradient=False)
+            >>> # Case 1: reduce over all dims
+            >>> result1 = paddle.compat.max(x)
+            >>> result1
+            Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=False,
+            0.90000000)
+
+            >>> # Case 2: reduce over specified dim
+            >>> x.clear_grad()
+            >>> result2 = paddle.compat.max(x, dim=1)
+            >>> result2
+            MinMaxRetType(values=Tensor(shape=[2], dtype=float64, place=Place(gpu:0), stop_gradient=False,
+                [0.90000000, 0.70000000]), indices=Tensor(shape=[2], dtype=int64, place=Place(gpu:0), stop_gradient=True,
+                [3, 3]))
+            >>> result2[0].backward()
+            >>> x.grad
+            Tensor(shape=[2, 4], dtype=float64, place=Place(gpu:0), stop_gradient=False,
+                [[0., 0., 0., 1.],
+                 [0., 0., 0., 1.]])
+
+            >>> # Case 3: equivalent to `paddle.maximum`
+            >>> x.clear_grad()
+            >>> y = paddle.to_tensor([[0.5, 0.4, 0.1, 0.2],
+            ...                       [0.3, 0.1, 0.6, 0.7]],
+            ...                       dtype='float64', stop_gradient=False)
+            >>> result3 = paddle.compat.max(x, y)
+            >>> result3
+            Tensor(shape=[2, 4], dtype=float64, place=Place(gpu:0), stop_gradient=False,
+                [[0.50000000, 0.40000000, 0.50000000, 0.90000000],
+                 [0.30000000, 0.20000000, 0.60000000, 0.70000000]])
+    """
     if not isinstance(input, paddle.pir.Value) and not isinstance(
         input, paddle.Tensor
     ):
         raise TypeError(
             f"input should be a tensor, but got an instance with type '{type(input).__name__}'"
         )
+    _min_max_tensor_allow_grad(input)
 
     dim_or_other, keepdim = _min_max_param_checker("max", *args, **kwargs)
 

From 6fa8807ed77c48646afd050783e51cd4e27dee4d Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Sun, 10 Aug 2025 04:47:00 +0000
Subject: [PATCH 08/24] [WIP][API-Compat] Add dyna-graph unittests for min/max

---
 .../pir/dialect/op_generator/op_build_gen.py  |   1 +
 test/legacy_test/test_compat_minmax.py        | 255 ++++++++++++++++++
 2 files changed, 256 insertions(+)
 create mode 100644 test/legacy_test/test_compat_minmax.py

diff --git a/paddle/fluid/pir/dialect/op_generator/op_build_gen.py b/paddle/fluid/pir/dialect/op_generator/op_build_gen.py
index f8510480b2fca4..60840cc60ec5e9 100644
--- a/paddle/fluid/pir/dialect/op_generator/op_build_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/op_build_gen.py
@@ -135,6 +135,7 @@
     'KthvalueInferMeta',
     'MaxPoolWithIndexInferMeta',
     'MaxPoolV2InferMeta',
+    'MinMaxWithIndexInferMeta',
     'MultinomialInferMeta',
     'OverlapAddInferMeta',
     'PadInferMeta',
diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py
new file mode 100644
index 00000000000000..1db80522e7b7f5
--- /dev/null
+++ b/test/legacy_test/test_compat_minmax.py
@@ -0,0 +1,255 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+
+import paddle
+
+
+class TestCompatMinMax(unittest.TestCase):
+    def setUp(self):
+        """Make sure we are in a dynamic graph env"""
+        paddle.disable_static()
+
+    def test_case1_simple_reduce_all(self):
+        data = paddle.to_tensor([[1.0, 2.0], [3.0, 4.0]], dtype='float32')
+        min_val = paddle.compat.min(data)
+        max_val = paddle.compat.max(data)
+
+        self.assertAlmostEqual(min_val.item(), 1.0)
+        self.assertAlmostEqual(max_val.item(), 4.0)
+
+        data = paddle.to_tensor(
+            [[1.0, 1.0], [2.0, 3.0]], dtype='float32', stop_gradient=False
+        )
+        min_val = paddle.compat.min(data)
+        min_val.backward()
+
+        expected_grad = np.array([[0.5, 0.5], [0.0, 0.0]])
+        np.testing.assert_allclose(data.grad.numpy(), expected_grad)
+
+    def test_case2_reduce_dim(self):
+        """Test dim/keepdim"""
+        data = paddle.to_tensor(
+            [[[5, 8], [2, 1]], [[7, 3], [9, 6]]], dtype='float32'
+        )
+
+        min_result = paddle.compat.min(data, dim=1)
+        self.assertEqual(min_result.values.shape, [2, 2])
+        np.testing.assert_array_equal(
+            min_result.values.numpy(), np.array([[2, 1], [7, 3]])
+        )
+        np.testing.assert_array_equal(
+            min_result.indices.numpy(), np.array([[1, 1], [0, 0]])
+        )
+
+        max_result = paddle.compat.max(data, dim=2)
+        self.assertEqual(max_result.values.shape, [2, 2])
+        np.testing.assert_array_equal(
+            max_result.values.numpy(), np.array([[8, 2], [7, 9]])
+        )
+        np.testing.assert_array_equal(
+            max_result.indices.numpy(), np.array([[1, 0], [0, 0]])
+        )
+
+        min_result_keep = paddle.compat.min(data, dim=0, keepdim=True)
+        self.assertEqual(min_result_keep.values.shape, [1, 2, 2])
+        np.testing.assert_array_equal(
+            min_result_keep.values.numpy(), np.array([[[5, 3], [2, 1]]])
+        )
+
+        min_result_neg = paddle.compat.min(data, dim=-2)
+        np.testing.assert_array_equal(
+            min_result_neg.values.numpy(), min_result.values.numpy()
+        )
+
+    def test_case2_grad(self):
+        data = paddle.to_tensor(
+            [[[1.0, 2.0], [1.0, 3.0]], [[4.0, 1.0], [5.0, 1.0]]],
+            dtype='float32',
+            stop_gradient=False,
+        )
+        y = data * 2
+
+        min_result = paddle.compat.min(y, dim=2)
+        min_result.values.backward()
+
+        expected_grad = np.array(
+            [[[2.0, 0.0], [2.0, 0.0]], [[0.0, 2.0], [0.0, 2.0]]]
+        )
+        np.testing.assert_allclose(data.grad.numpy(), expected_grad, atol=1e-6)
+
+    def test_case3_elementwise(self):
+        """minimum/maximum"""
+        x = paddle.to_tensor([[1, 5], [4, 2]], dtype='float32')
+        y = paddle.to_tensor([[3, 2], [1, 6]], dtype='float32')
+
+        min_result = paddle.compat.min(x, y)
+        np.testing.assert_array_equal(
+            min_result.numpy(), np.array([[1, 2], [1, 2]])
+        )
+
+        max_result = paddle.compat.max(x, y)
+        np.testing.assert_array_equal(
+            max_result.numpy(), np.array([[3, 5], [4, 6]])
+        )
+
+        z = paddle.to_tensor([3, 4], dtype='float32')
+        broadcast_min = paddle.compat.min(x, z)
+        np.testing.assert_array_equal(
+            broadcast_min.numpy(), np.array([[1, 4], [3, 2]])
+        )
+
+    def test_case3_grad(self):
+        x = paddle.to_tensor(
+            [[1.0, 2.0], [3.0, 4.0]], dtype=paddle.float16, stop_gradient=False
+        )
+        y = paddle.to_tensor(
+            [[0.5, 2.5], [2.0, 3.5]], dtype=paddle.float16, stop_gradient=False
+        )
+
+        min_val = paddle.compat.min(x, y)
+        min_val.backward()
+
+        expected_x_grad = np.array([[0.0, 1.0], [0.0, 0.0]])
+        np.testing.assert_allclose(x.grad.numpy(), expected_x_grad)
+
+        expected_y_grad = np.array([[1.0, 0.0], [1.0, 1.0]])
+        np.testing.assert_allclose(y.grad.numpy(), expected_y_grad)
+
+    def test_edge_cases(self):
+        """Edge cases test"""
+        # uniform distributed gradient
+        uniform_data = paddle.ones([2, 3], dtype='float64')
+        uniform_data.stop_gradient = False
+        min_val = paddle.compat.min(uniform_data, 0)
+        min_val.values.sum().backward()
+
+        expected_grad = np.full((2, 3), 0.5)
+        np.testing.assert_allclose(uniform_data.grad.numpy(), expected_grad)
+
+        # 0-dim tensor
+        dim0_tensor = paddle.to_tensor(2, dtype='float32')
+        max_val = paddle.compat.max(dim0_tensor)
+        np.testing.assert_allclose(
+            max_val.numpy(), np.array(2.0, dtype=np.float32)
+        )
+
+        # 1-dim tensor
+        dim1_tensor = paddle.to_tensor([1], dtype='uint8')
+        max_val = paddle.compat.max(dim1_tensor, dim=-1, keepdim=True)
+        np.testing.assert_array_equal(
+            max_val[0].numpy(), np.array([1], dtype=np.uint8)
+        )
+        np.testing.assert_array_equal(
+            max_val[1].numpy(), np.array([0], dtype=np.int64)
+        )
+
+    def test_compare_with_index_ops_to_origin(self):
+        dtypes = ['float32', 'float64', 'bfloat16', 'float16', 'int32', 'int64']
+
+        for i, dtype in enumerate(dtypes):
+            data = paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype=dtype)
+            min_vals_inds = paddle.compat.min(data, dim=0)
+            self.assertEqual(min_vals_inds.values.dtype, data.dtype)
+            self.assertEqual(min_vals_inds.indices.dtype, paddle.int64)
+
+            origin_values = paddle.min(data, axis=0)
+            origin_indices = paddle.argmin(data, axis=0, dtype="int64")
+            if i < 4:  # floating point
+                np.testing.assert_allclose(
+                    min_vals_inds.values.numpy(), origin_values.numpy()
+                )
+            else:
+                np.testing.assert_array_equal(
+                    min_vals_inds.values.numpy(), origin_values.numpy()
+                )
+            np.testing.assert_array_equal(
+                min_vals_inds[1].numpy(), origin_indices.numpy()
+            )
+
+    def test_error_handling(self):
+        """Test whether correct exception will be thrown. Skip error messages (some of them are long)"""
+
+        err_msg1 = (
+            "Tensors with integral type: 'paddle.int32' should stop gradient."
+        )
+
+        # empty tensor
+        empty_tensor = paddle.to_tensor([], dtype='float32')
+        with self.assertRaises(ValueError):
+            paddle.compat.min(empty_tensor)
+
+        # mixed parameters case 1
+        input_ts = paddle.to_tensor([1, 2, 3], dtype='float32')
+        other_ts = paddle.to_tensor([1])
+        with self.assertRaises(TypeError):
+            paddle.compat.min(input_ts, other=other_ts, dim=0)
+
+        # mixed parameters case 2
+        with self.assertRaises(TypeError):
+            paddle.compat.min(input_ts, 0, other=other_ts)
+
+        # trying to perform grad ops for integral types
+        with self.assertRaises(TypeError) as cm:
+            tensor = paddle.ones([2, 2], dtype=paddle.int32)
+            tensor.stop_gradient = False
+            tensors = paddle.compat.max(tensor, dim=0)
+        self.assertEqual(str(cm.exception), err_msg1)
+
+        # explicit None case 1
+        with self.assertRaises(TypeError) as cm:
+            paddle.compat.min(input_ts, dim=None)
+
+        # explicit None case 2
+        with self.assertRaises(TypeError) as cm:
+            paddle.compat.min(input_ts, None, keepdim=True)
+
+        # keepdim specified without specifying dim
+        with self.assertRaises(TypeError) as cm:
+            paddle.compat.min(input_ts, keepdim=True)
+
+        # Wrong *args specification case 1
+        with self.assertRaises(TypeError) as cm:
+            paddle.compat.min(input_ts, False)
+
+        # Wrong *args specification case 2
+        with self.assertRaises(TypeError) as cm:
+            paddle.compat.min(input_ts, other_ts, True)
+
+        # Tensor input for dim case 1
+        with self.assertRaises(TypeError) as cm:
+            paddle.compat.min(input_ts, dim=paddle.to_tensor([0]))
+
+        # Tensor input for dim case 2
+        with self.assertRaises(TypeError) as cm:
+            paddle.compat.min(input_ts, dim=paddle.to_tensor(0))
+
+        # Duplicate Arguments case 1
+        with self.assertRaises(TypeError) as cm:
+            paddle.compat.max(input_ts, 0, dim=0)
+
+        # Duplicate Arguments case 2
+        with self.assertRaises(TypeError) as cm:
+            paddle.compat.max(input_ts, other_ts, other=0)
+
+        # Duplicate Arguments case 3
+        with self.assertRaises(TypeError) as cm:
+            paddle.compat.max(input_ts, dim=0, other=0, keepdim=True)
+
+
+if __name__ == '__main__':
+    unittest.main()

From adb4c253ac77956757ae77bad48dfa1f348e3761 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Sun, 10 Aug 2025 08:19:17 +0000
Subject: [PATCH 09/24] [WIP][API-Compat] Fixed CPU failure

---
 python/paddle/tensor/compat.py         | 12 ++++--------
 python/paddle/tensor/math.py           | 12 ++++++++++++
 test/legacy_test/test_compat_minmax.py | 18 ++++++++++++++++++
 3 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py
index c6cfdee988d34b..9df69ca7e24d28 100644
--- a/python/paddle/tensor/compat.py
+++ b/python/paddle/tensor/compat.py
@@ -593,10 +593,8 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
             return MinMaxRetType(values=vals, indices=inds)
         else:
             # CPUPlace and other placements are implemented by composition
-            indices = _C_ops.argmin(
-                input, dim_or_other, True, False, paddle.int64
-            )
-            values = _C_ops.take_along_axis(input, indices, dim_or_other)
+            indices = paddle.argmin(input, axis=dim_or_other, keepdim=True)
+            values = paddle.take_along_axis(input, indices, axis=dim_or_other)
             if keepdim:
                 return MinMaxRetType(values=values, indices=indices)
             return MinMaxRetType(
@@ -708,10 +706,8 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
             return MinMaxRetType(values=vals, indices=inds)
         else:
             # CPUPlace and other placements are implemented by composition
-            indices = _C_ops.argmax(
-                input, dim_or_other, True, False, paddle.int64
-            )
-            values = _C_ops.take_along_axis(input, indices, dim_or_other)
+            indices = paddle.argmax(input, axis=dim_or_other, keepdim=True)
+            values = paddle.take_along_axis(input, indices, axis=dim_or_other)
             if keepdim:
                 return MinMaxRetType(values=values, indices=indices)
             return MinMaxRetType(
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index 1f84b1d6067e4f..da5eedfe992c26 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -110,6 +110,8 @@
     from paddle import Tensor
     from paddle._typing import DTypeLike
 
+from paddle.utils.decorator_utils import ForbidKeywordsDecorator
+
 __all__ = []
 
 _supported_int_dtype_ = [
@@ -3272,6 +3274,11 @@ def _check_input(x):
         return out
 
 
+@ForbidKeywordsDecorator(
+    illegal_keys=["input", "dim", "other"],
+    func_name="paddle.max",
+    correct_name="paddle.compat.max",
+)
 def max(
     x: Tensor,
     axis: int | Sequence[int] | None = None,
@@ -3431,6 +3438,11 @@ def max(
             return out
 
 
+@ForbidKeywordsDecorator(
+    illegal_keys=["input", "dim", "other"],
+    func_name="paddle.min",
+    correct_name="paddle.compat.min",
+)
 def min(
     x: Tensor,
     axis: int | Sequence[int] | None = None,
diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py
index 1db80522e7b7f5..5adf80fecbf5d8 100644
--- a/test/legacy_test/test_compat_minmax.py
+++ b/test/legacy_test/test_compat_minmax.py
@@ -187,6 +187,14 @@ def test_error_handling(self):
         err_msg1 = (
             "Tensors with integral type: 'paddle.int32' should stop gradient."
         )
+        err_msg2 = (
+            "paddle.min() received unexpected keyword arguments 'input', 'dim'. "
+            "\nDid you mean to use paddle.compat.min() instead?"
+        )
+        err_msg3 = (
+            "paddle.compat.max() received unexpected keyword argument 'axis'. "
+            "\nDid you mean to use paddle.max() instead?"
+        )
 
         # empty tensor
         empty_tensor = paddle.to_tensor([], dtype='float32')
@@ -250,6 +258,16 @@ def test_error_handling(self):
         with self.assertRaises(TypeError) as cm:
             paddle.compat.max(input_ts, dim=0, other=0, keepdim=True)
 
+        # Wrong API used case 1
+        with self.assertRaises(TypeError) as cm:
+            paddle.min(input=input_ts, dim=0)
+        self.assertEqual(str(cm.exception), err_msg2)
+
+        # Wrong API used case 2
+        with self.assertRaises(TypeError) as cm:
+            paddle.compat.max(input_ts, axis=0)
+        self.assertEqual(str(cm.exception), err_msg3)
+
 
 if __name__ == '__main__':
     unittest.main()

From fd6adf01e125cc59cea26b517b765959f95001ad Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Sun, 10 Aug 2025 16:35:43 +0000
Subject: [PATCH 10/24] [API-Compat] Correct min/max_with index gradient
 behavior

---
 .../gpu/min_max_with_index_grad_kernel.cu     | 115 ++++++++++++++++++
 .../kernels/gpu/min_max_with_index_kernel.cu  |  12 +-
 paddle/phi/kernels/gpu/reduce_kernel.cu       | 100 ---------------
 .../min_max_with_index_grad_kernel.h.h        |  42 -------
 .../phi/kernels/min_max_with_index_kernel.h   |   8 +-
 paddle/phi/ops/yaml/backward.yaml             |   8 +-
 paddle/phi/ops/yaml/ops.yaml                  |   4 +-
 python/paddle/tensor/compat.py                |  23 +++-
 test/legacy_test/test_compat_minmax.py        |  49 +++++++-
 9 files changed, 195 insertions(+), 166 deletions(-)
 create mode 100644 paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu
 delete mode 100644 paddle/phi/kernels/min_max_with_index_grad_kernel.h.h

diff --git a/paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu b/paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu
new file mode 100644
index 00000000000000..f34d03bf07e506
--- /dev/null
+++ b/paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu
@@ -0,0 +1,115 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/common/place.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/core/utils/data_type.h"
+#include "paddle/phi/kernels/funcs/gather_scatter_functor.h"
+#include "paddle/phi/kernels/funcs/math_function.h"
+
+namespace phi {
+
+template <typename T>
+using EnableIfInteger =
+    typename std::enable_if<std::is_integral<T>::value, int>::type;
+
+template <typename T>
+using EnableIfNonInteger =
+    typename std::enable_if<!std::is_integral<T>::value, int>::type;
+
+// Here if keepdim=True, this will fallback to a simplified version of
+// take_along_axis. However, if keepdim=False (by default), indices will
+// not have equal rank will the input values (and values_grad), therefore
+// needs an unsqueeze operation by shallow copying indices and Resize
+#define DEFINE_WITH_INDEX_GRAD_KERNEL(OpType)                                \
+  template <typename T, typename Context, EnableIfNonInteger<T> = 0>         \
+  void OpType##WithIndexGradKernel(const Context& dev_ctx,                   \
+                                   const DenseTensor& x,                     \
+                                   const DenseTensor& values,                \
+                                   const DenseTensor& indices,               \
+                                   const DenseTensor& values_grad,           \
+                                   const Scalar& dim,                        \
+                                   bool keepdim,                             \
+                                   DenseTensor* x_grad) {                    \
+    x_grad->Resize(x.dims());                                                \
+    dev_ctx.template Alloc<T>(x_grad);                                       \
+    if (x_grad->numel() == 0) {                                              \
+      return;                                                                \
+    }                                                                        \
+    int64_t dim_val = dim.to<int64_t>();                                     \
+    if (dim_val < 0) {                                                       \
+      dim_val += x.dims().size();                                            \
+    }                                                                        \
+    DenseTensor shallow_copied_inds(indices);                                \
+    if (!keepdim) {                                                          \
+      auto indices_dim = x.dims();                                           \
+      indices_dim[dim_val] = 1;                                              \
+      shallow_copied_inds.Resize(indices_dim);                               \
+    }                                                                        \
+    phi::funcs::SetConstant<Context, T> functor;                             \
+    functor(dev_ctx, x_grad, static_cast<T>(0));                             \
+    phi::funcs::gpu_scatter_add_kernel<T, int64_t>(                          \
+        *x_grad, dim_val, shallow_copied_inds, values_grad, true, dev_ctx);  \
+  }                                                                          \
+  template <typename T, typename Context, EnableIfInteger<T> = 0>            \
+  void OpType##WithIndexGradKernel(const Context& dev_ctx,                   \
+                                   const DenseTensor& x,                     \
+                                   const DenseTensor& values,                \
+                                   const DenseTensor& indices,               \
+                                   const DenseTensor& values_grad,           \
+                                   const Scalar& dim,                        \
+                                   bool keepdim,                             \
+                                   DenseTensor* x_grad) {                    \
+    std::string dtype_name = phi::DataTypeToString(values.dtype());          \
+    PADDLE_ENFORCE_EQ(                                                       \
+        0,                                                                   \
+        1,                                                                   \
+        phi::errors::InvalidArgument(                                        \
+            "Integer type '%s' is not allowed to have stop_gradient=False.", \
+            dtype_name.c_str()));                                            \
+  }
+
+DEFINE_WITH_INDEX_GRAD_KERNEL(Max)
+DEFINE_WITH_INDEX_GRAD_KERNEL(Min)
+
+#undef DEFINE_WITH_INDEX_GRAD_KERNEL
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(max_with_index_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::MaxWithIndexGradKernel,
+                   float,
+                   double,
+                   uint8_t,
+                   int,
+                   int16_t,
+                   int64_t,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
+
+PD_REGISTER_KERNEL(min_with_index_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::MinWithIndexGradKernel,
+                   float,
+                   double,
+                   uint8_t,
+                   int,
+                   int16_t,
+                   int64_t,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
diff --git a/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu b/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu
index 57699f2f97e83e..2509c34fb0c8fd 100644
--- a/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu
+++ b/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu
@@ -254,25 +254,25 @@ void MinMaxWithIndexOpCUDAKernel(const Context& dev_ctx,
 template <typename T, typename Context>
 void MinWithIndexKernel(const Context& dev_ctx,
                         const DenseTensor& x,
-                        const Scalar& axis,
-                        bool keepdims,
+                        const Scalar& dim,
+                        bool keepdim,
                         bool flatten,
                         DenseTensor* val_out,
                         DenseTensor* ind_out) {
   MinMaxWithIndexOpCUDAKernel<Context, T, cub::ArgMin>(
-      dev_ctx, x, axis, keepdims, flatten, val_out, ind_out);
+      dev_ctx, x, dim, keepdim, flatten, val_out, ind_out);
 }
 
 template <typename T, typename Context>
 void MaxWithIndexKernel(const Context& dev_ctx,
                         const DenseTensor& x,
-                        const Scalar& axis,
-                        bool keepdims,
+                        const Scalar& dim,
+                        bool keepdim,
                         bool flatten,
                         DenseTensor* val_out,
                         DenseTensor* ind_out) {
   MinMaxWithIndexOpCUDAKernel<Context, T, cub::ArgMax>(
-      dev_ctx, x, axis, keepdims, flatten, val_out, ind_out);
+      dev_ctx, x, dim, keepdim, flatten, val_out, ind_out);
 }
 
 #endif
diff --git a/paddle/phi/kernels/gpu/reduce_kernel.cu b/paddle/phi/kernels/gpu/reduce_kernel.cu
index 089cb3601f0a5b..3f55297474015c 100644
--- a/paddle/phi/kernels/gpu/reduce_kernel.cu
+++ b/paddle/phi/kernels/gpu/reduce_kernel.cu
@@ -160,80 +160,6 @@ void ReduceAMaxGradKernel(const Context& dev_ctx,
       dev_ctx, x, out, out_grad, dims, keep_dim, reduce_all, x_grad);
 }
 
-template <typename T>
-using EnableIfInteger =
-    typename std::enable_if<std::is_integral<T>::value, int>::type;
-
-template <typename T>
-using EnableIfNonInteger =
-    typename std::enable_if<!std::is_integral<T>::value, int>::type;
-
-template <typename T, typename Context, EnableIfNonInteger<T> = 0>
-void MinWithIndexGradKernel(const Context& dev_ctx,
-                            const DenseTensor& x,
-                            const DenseTensor& values,
-                            const DenseTensor& values_grad,
-                            const Scalar& dim,
-                            bool keepdims,
-                            bool flatten,
-                            DenseTensor* x_grad) {
-  int64_t dim_val = dim.to<int64_t>();
-  flatten = recompute_reduce_all(x, {dim_val}, flatten);
-  ReduceCudaAMaxAMinGrad<T, Context>(
-      dev_ctx, x, values, values_grad, {dim_val}, keepdims, flatten, x_grad);
-}
-
-template <typename T, typename Context, EnableIfInteger<T> = 0>
-void MinWithIndexGradKernel(const Context& dev_ctx,
-                            const DenseTensor& x,
-                            const DenseTensor& values,
-                            const DenseTensor& values_grad,
-                            const Scalar& dim,
-                            bool keepdims,
-                            bool flatten,
-                            DenseTensor* x_grad) {
-  std::string dtype_name = phi::DataTypeToString(x.dtype());
-  PADDLE_ENFORCE_EQ(
-      0,
-      1,
-      phi::errors::InvalidArgument(
-          "Integer type '%s' is not allowed to have stop_gradient=False.",
-          dtype_name.c_str()));
-}
-
-template <typename T, typename Context, EnableIfNonInteger<T> = 0>
-void MaxWithIndexGradKernel(const Context& dev_ctx,
-                            const DenseTensor& x,
-                            const DenseTensor& values,
-                            const DenseTensor& values_grad,
-                            const Scalar& dim,
-                            bool keepdims,
-                            bool flatten,
-                            DenseTensor* x_grad) {
-  int64_t dim_val = dim.to<int64_t>();
-  flatten = recompute_reduce_all(x, {dim_val}, flatten);
-  ReduceCudaAMaxAMinGrad<T, Context>(
-      dev_ctx, x, values, values_grad, {dim_val}, keepdims, flatten, x_grad);
-}
-
-template <typename T, typename Context, EnableIfInteger<T> = 0>
-void MaxWithIndexGradKernel(const Context& dev_ctx,
-                            const DenseTensor& x,
-                            const DenseTensor& values,
-                            const DenseTensor& values_grad,
-                            const Scalar& dim,
-                            bool keepdims,
-                            bool flatten,
-                            DenseTensor* x_grad) {
-  std::string dtype_name = phi::DataTypeToString(x.dtype());
-  PADDLE_ENFORCE_EQ(
-      0,
-      1,
-      phi::errors::InvalidArgument(
-          "Integer type '%s' is not allowed to have stop_gradient=False.",
-          dtype_name.c_str()));
-}
-
 template <typename T, typename Context>
 void ReduceMaxGradKernel(const Context& dev_ctx,
                          const DenseTensor& x,
@@ -359,19 +285,6 @@ PD_REGISTER_KERNEL(max_grad,
                    phi::dtype::float16,
                    phi::dtype::bfloat16) {}
 
-PD_REGISTER_KERNEL(max_with_index_grad,
-                   GPU,
-                   ALL_LAYOUT,
-                   phi::MaxWithIndexGradKernel,
-                   float,
-                   double,
-                   uint8_t,
-                   int,
-                   int16_t,
-                   int64_t,
-                   phi::dtype::float16,
-                   phi::dtype::bfloat16) {}
-
 PD_REGISTER_KERNEL(mean_grad,
                    GPU,
                    ALL_LAYOUT,
@@ -398,19 +311,6 @@ PD_REGISTER_KERNEL(min_grad,
                    phi::dtype::float16,
                    phi::dtype::bfloat16) {}
 
-PD_REGISTER_KERNEL(min_with_index_grad,
-                   GPU,
-                   ALL_LAYOUT,
-                   phi::MinWithIndexGradKernel,
-                   float,
-                   double,
-                   uint8_t,
-                   int,
-                   int16_t,
-                   int64_t,
-                   phi::dtype::float16,
-                   phi::dtype::bfloat16) {}
-
 PD_REGISTER_KERNEL(sum_grad,
                    GPU,
                    ALL_LAYOUT,
diff --git a/paddle/phi/kernels/min_max_with_index_grad_kernel.h.h b/paddle/phi/kernels/min_max_with_index_grad_kernel.h.h
deleted file mode 100644
index 30c4fd34bb281e..00000000000000
--- a/paddle/phi/kernels/min_max_with_index_grad_kernel.h.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "paddle/phi/common/scalar.h"
-#include "paddle/phi/core/dense_tensor.h"
-
-namespace phi {
-
-template <typename T, typename Context>
-void MaxWithIndexGradKernel(const Context& dev_ctx,
-                            const DenseTensor& x,
-                            const DenseTensor& values,
-                            const DenseTensor& values_grad,
-                            const Scalar& dim,
-                            bool keepdims,
-                            bool flatten,
-                            DenseTensor* x_grad);
-
-template <typename T, typename Context>
-void MinWithIndexGradKernel(const Context& dev_ctx,
-                            const DenseTensor& x,
-                            const DenseTensor& values,
-                            const DenseTensor& values_grad,
-                            const Scalar& dim,
-                            bool keepdims,
-                            bool flatten,
-                            DenseTensor* x_grad);
-
-}  // namespace phi
diff --git a/paddle/phi/kernels/min_max_with_index_kernel.h b/paddle/phi/kernels/min_max_with_index_kernel.h
index 2e5ad70feaaec4..eca50fc3a752e8 100644
--- a/paddle/phi/kernels/min_max_with_index_kernel.h
+++ b/paddle/phi/kernels/min_max_with_index_kernel.h
@@ -22,8 +22,8 @@ namespace phi {
 template <typename T, typename Context>
 void MinWithIndexKernel(const Context& dev_ctx,
                         const DenseTensor& x,
-                        const Scalar& axis,
-                        bool keepdims,
+                        const Scalar& dim,
+                        bool keepdim,
                         bool flatten,
                         DenseTensor* val_out,
                         DenseTensor* ind_out);
@@ -31,8 +31,8 @@ void MinWithIndexKernel(const Context& dev_ctx,
 template <typename T, typename Context>
 void MaxWithIndexKernel(const Context& dev_ctx,
                         const DenseTensor& x,
-                        const Scalar& axis,
-                        bool keepdims,
+                        const Scalar& dim,
+                        bool keepdim,
                         bool flatten,
                         DenseTensor* val_out,
                         DenseTensor* ind_out);
diff --git a/paddle/phi/ops/yaml/backward.yaml b/paddle/phi/ops/yaml/backward.yaml
index f75e509f6aaa22..154b99e557fabf 100644
--- a/paddle/phi/ops/yaml/backward.yaml
+++ b/paddle/phi/ops/yaml/backward.yaml
@@ -2278,8 +2278,8 @@
     func : max_pool3d_with_index_grad
 
 - backward_op : max_with_index_grad
-  forward : max_with_index (Tensor x, Scalar axis, bool keepdims, bool flatten) -> Tensor(values), Tensor(indices)
-  args : (Tensor x, Tensor values, Tensor values_grad, Scalar axis, bool keepdims, bool flatten)
+  forward : max_with_index (Tensor x, Scalar dim, bool keepdim, bool flatten) -> Tensor(values), Tensor(indices)
+  args : (Tensor x, Tensor values, Tensor indices, Tensor values_grad, Scalar dim, bool keepdim)
   output : Tensor(x_grad)
   infer_meta :
     func : UnchangedInferMeta
@@ -2351,8 +2351,8 @@
     data_type : out_grad
 
 - backward_op : min_with_index_grad
-  forward : min_with_index (Tensor x, Scalar axis, bool keepdims, bool flatten) -> Tensor(values), Tensor(indices)
-  args : (Tensor x, Tensor values, Tensor values_grad, Scalar axis, bool keepdims, bool flatten)
+  forward : min_with_index (Tensor x, Scalar dim, bool keepdim, bool flatten) -> Tensor(values), Tensor(indices)
+  args : (Tensor x, Tensor values, Tensor indices, Tensor values_grad, Scalar dim, bool keepdim)
   output : Tensor(x_grad)
   infer_meta :
     func : UnchangedInferMeta
diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml
index 9d84600531a792..78dcca6d579589 100644
--- a/paddle/phi/ops/yaml/ops.yaml
+++ b/paddle/phi/ops/yaml/ops.yaml
@@ -3578,7 +3578,7 @@
   interfaces : paddle::dialect::InferSymbolicShapeInterface
 
 - op : max_with_index
-  args : (Tensor x, Scalar(int64_t) axis, bool keepdims = false, bool flatten = false)
+  args : (Tensor x, Scalar(int64_t) dim, bool keepdim = false, bool flatten = false)
   output : Tensor(values), Tensor(indices)
   infer_meta :
     func : MinMaxWithIndexInferMeta
@@ -3698,7 +3698,7 @@
   interfaces : paddle::dialect::InferSymbolicShapeInterface
 
 - op : min_with_index
-  args : (Tensor x, Scalar(int64_t) axis, bool keepdims = false, bool flatten = false)
+  args : (Tensor x, Scalar(int64_t) dim, bool keepdim = false, bool flatten = false)
   output : Tensor(values), Tensor(indices)
   infer_meta :
     func : MinMaxWithIndexInferMeta
diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py
index 9df69ca7e24d28..2ff268d916a2d6 100644
--- a/python/paddle/tensor/compat.py
+++ b/python/paddle/tensor/compat.py
@@ -492,6 +492,21 @@ def _min_max_tensor_allow_grad(input: Tensor):
             )
 
 
+def _min_max_allow_cpu_composite(input: Tensor):
+    """paddle.min/argmin(max/argmax), paddle.take_along_axis reject the following types"""
+    in_dtype = input.dtype
+    if (
+        in_dtype == paddle.float16
+        or in_dtype == paddle.bfloat16
+        or in_dtype == paddle.int16
+    ):
+        if not input.place.is_gpu_place():
+            raise TypeError(
+                f"Non-CUDA GPU placed Tensor does not have '{in_dtype}' op registered.\n"
+                "Paddle support following DataTypes: int32, int64, float64, float32, uint8"
+            )
+
+
 @ForbidKeywordsDecorator(
     illegal_keys=['x', 'axis'],
     func_name="paddle.compat.min",
@@ -510,7 +525,8 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
         following torch.min. The gradient behavior of `values` for case 2 is the same as `paddle.amin`.
 
     Args:
-        input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64.
+        input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64 on GPU.
+            uint8, int32, int64, float32, float64 are allowed on CPU.
         dim (int, optional): The dim along which the minimum is computed.
             If this is not specified: see case 1, note that: `None` cannot be passed to this (TypeError will be thrown)
             compute the minimum over all elements of `input` and return a Tensor with a single element,
@@ -592,6 +608,7 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
             inds.stop_gradient = True
             return MinMaxRetType(values=vals, indices=inds)
         else:
+            _min_max_allow_cpu_composite(input)
             # CPUPlace and other placements are implemented by composition
             indices = paddle.argmin(input, axis=dim_or_other, keepdim=True)
             values = paddle.take_along_axis(input, indices, axis=dim_or_other)
@@ -623,7 +640,8 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
         following torch.max. The gradient behavior of `values` for case 2 is the same as `paddle.amax`.
 
     Args:
-        input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64.
+        input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64 on GPU.
+            uint8, int32, int64, float32, float64 are allowed on CPU.
         dim (int, optional): The dim along which the maximum is computed.
             If this is not specified: see case 1, note that: `None` cannot be passed to this (TypeError will be thrown)
             compute the maximum over all elements of `input` and return a Tensor with a single element,
@@ -705,6 +723,7 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
             inds.stop_gradient = True
             return MinMaxRetType(values=vals, indices=inds)
         else:
+            _min_max_allow_cpu_composite(input)
             # CPUPlace and other placements are implemented by composition
             indices = paddle.argmax(input, axis=dim_or_other, keepdim=True)
             values = paddle.take_along_axis(input, indices, axis=dim_or_other)
diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py
index 5adf80fecbf5d8..4bf3f7ebed52f6 100644
--- a/test/legacy_test/test_compat_minmax.py
+++ b/test/legacy_test/test_compat_minmax.py
@@ -92,6 +92,15 @@ def test_case2_grad(self):
         )
         np.testing.assert_allclose(data.grad.numpy(), expected_grad, atol=1e-6)
 
+        data.clear_grad()
+        y = data * data
+        min_result = paddle.compat.min(y, dim=1)
+        min_result[0].backward()
+        expected_grad = np.array(
+            [[[2.0, 4.0], [0.0, 0.0]], [[8.0, 2.0], [0.0, 0.0]]]
+        )
+        np.testing.assert_allclose(data.grad.numpy(), expected_grad, atol=1e-6)
+
     def test_case3_elementwise(self):
         """minimum/maximum"""
         x = paddle.to_tensor([[1, 5], [4, 2]], dtype='float32')
@@ -115,10 +124,10 @@ def test_case3_elementwise(self):
 
     def test_case3_grad(self):
         x = paddle.to_tensor(
-            [[1.0, 2.0], [3.0, 4.0]], dtype=paddle.float16, stop_gradient=False
+            [[1.0, 2.0], [3.0, 4.0]], dtype=paddle.float32, stop_gradient=False
         )
         y = paddle.to_tensor(
-            [[0.5, 2.5], [2.0, 3.5]], dtype=paddle.float16, stop_gradient=False
+            [[0.5, 2.5], [2.0, 3.5]], dtype=paddle.float32, stop_gradient=False
         )
 
         min_val = paddle.compat.min(x, y)
@@ -135,10 +144,17 @@ def test_edge_cases(self):
         # uniform distributed gradient
         uniform_data = paddle.ones([2, 3], dtype='float64')
         uniform_data.stop_gradient = False
+        min_val = paddle.compat.min(uniform_data)
+        min_val.sum().backward()
+        # uniformly distributed (amin)
+        expected_grad = np.full((2, 3), 1.0 / 6.0)
+        np.testing.assert_allclose(uniform_data.grad.numpy(), expected_grad)
+
+        uniform_data.clear_grad()
         min_val = paddle.compat.min(uniform_data, 0)
         min_val.values.sum().backward()
-
-        expected_grad = np.full((2, 3), 0.5)
+        # take_along_axis like gradient behavior
+        expected_grad = np.array([[1.0, 1.0, 1.0], [0.0, 0.0, 0.0]])
         np.testing.assert_allclose(uniform_data.grad.numpy(), expected_grad)
 
         # 0-dim tensor
@@ -159,16 +175,26 @@ def test_edge_cases(self):
         )
 
     def test_compare_with_index_ops_to_origin(self):
-        dtypes = ['float32', 'float64', 'bfloat16', 'float16', 'int32', 'int64']
+        dtypes = ['float32', 'float64', 'int32', 'int64', 'uint8']
+        cpu_reject_types = {'int16', 'bfloat16', 'float16'}
 
         for i, dtype in enumerate(dtypes):
             data = paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype=dtype)
+            # `bfloat16` and `float16` are rejected on CPU
+            if not data.place.is_gpu_place() and dtype in cpu_reject_types:
+                continue
             min_vals_inds = paddle.compat.min(data, dim=0)
             self.assertEqual(min_vals_inds.values.dtype, data.dtype)
             self.assertEqual(min_vals_inds.indices.dtype, paddle.int64)
 
-            origin_values = paddle.min(data, axis=0)
             origin_indices = paddle.argmin(data, axis=0, dtype="int64")
+            if dtype != 'uint8':
+                origin_values = paddle.min(data, axis=0)
+            else:
+                origin_values = paddle.take_along_axis(
+                    data, origin_indices.unsqueeze(0), axis=0
+                )
+                origin_values.squeeze_(axis=0)
             if i < 4:  # floating point
                 np.testing.assert_allclose(
                     min_vals_inds.values.numpy(), origin_values.numpy()
@@ -195,6 +221,10 @@ def test_error_handling(self):
             "paddle.compat.max() received unexpected keyword argument 'axis'. "
             "\nDid you mean to use paddle.max() instead?"
         )
+        err_msg4 = (
+            "Non-CUDA GPU placed Tensor does not have 'paddle.float16' op registered.\n"
+            "Paddle support following DataTypes: int32, int64, float64, float32, uint8"
+        )
 
         # empty tensor
         empty_tensor = paddle.to_tensor([], dtype='float32')
@@ -268,6 +298,13 @@ def test_error_handling(self):
             paddle.compat.max(input_ts, axis=0)
         self.assertEqual(str(cm.exception), err_msg3)
 
+        # Rejected on CPU types
+        with self.assertRaises(TypeError) as cm:
+            tensor = paddle.to_tensor([1, 2, 3], dtype="float16")
+            cpu_tensor = tensor.to("cpu")
+            paddle.compat.max(cpu_tensor, dim=0)
+        self.assertEqual(str(cm.exception), err_msg4)
+
 
 if __name__ == '__main__':
     unittest.main()

From 30815564cb95b3f57acf7b4a948d723a0d6e1879 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Mon, 11 Aug 2025 02:01:36 +0000
Subject: [PATCH 11/24] [API-Compat] XPU fix (attempt)

---
 python/paddle/tensor/compat.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py
index 2ff268d916a2d6..150e10dc27485a 100644
--- a/python/paddle/tensor/compat.py
+++ b/python/paddle/tensor/compat.py
@@ -599,7 +599,11 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
     dim_or_other, keepdim = _min_max_param_checker("min", *args, **kwargs)
 
     if dim_or_other is None:
-        return _C_ops.min(input, None, False)
+        if input.numel() == 0:
+            raise ValueError(
+                "Reduce max cannot apply on empty tensor (numel == 0)"
+            )
+        return paddle.amin(input)
     elif isinstance(dim_or_other, int):
         if input.place.is_gpu_place():
             vals, inds = _C_ops.min_with_index(
@@ -714,7 +718,11 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
     dim_or_other, keepdim = _min_max_param_checker("max", *args, **kwargs)
 
     if dim_or_other is None:
-        return _C_ops.max(input, None, False)
+        if input.numel() == 0:
+            raise ValueError(
+                "Reduce max cannot apply on empty tensor (numel == 0)"
+            )
+        return paddle.amax(input)
     elif isinstance(dim_or_other, int):
         if input.place.is_gpu_place():
             vals, inds = _C_ops.max_with_index(

From cd8d6aec949fa67aad4671685bd2b9ca90489e00 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Mon, 11 Aug 2025 02:35:15 +0000
Subject: [PATCH 12/24] [API-Compat] Updated ForbidKeywordsDecorator

---
 python/paddle/utils/decorator_utils.py | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/python/paddle/utils/decorator_utils.py b/python/paddle/utils/decorator_utils.py
index 8f0c55e38caf5c..a9bee20a6efae8 100644
--- a/python/paddle/utils/decorator_utils.py
+++ b/python/paddle/utils/decorator_utils.py
@@ -247,32 +247,6 @@ def wrapper(*args: _InputT.args, **kwargs: _InputT.kwargs) -> _RetT:
     return decorator
 
 
-# *size => shape decorator
-class SizeArgsDecorator(DecoratorBase):
-    """
-    Usage Example:
-
-    paddle.ones(1, dtype=paddle.float32)
-    paddle.ones(1, 2, 3, dtype=paddle.float32)
-    paddle.ones([1, 2, 3], dtype=paddle.float32)
-    paddle.ones(size=[1, 2, 3], dtype=paddle.float32)
-
-    paddle.ones([1, 2, 3], paddle.float32)
-    paddle.ones(shape=[1, 2, 3], dtype=paddle.float32)
-    """
-
-    def process(
-        self, args: tuple[Any, ...], kwargs: dict[str, Any]
-    ) -> tuple[tuple[Any, ...], dict[str, Any]]:
-        if 'size' in kwargs:
-            kwargs['shape'] = kwargs.pop('size')
-        elif len(args) >= 1 and isinstance(args[0], int):
-            kwargs['shape'] = list(args)
-            args = ()
-
-        return args, kwargs
-
-
 class VariableArgsDecorator(DecoratorBase):
     def __init__(self, var: str) -> None:
         super().__init__()

From 085801eae05704451fb109c19e59b4c22c4df0d7 Mon Sep 17 00:00:00 2001
From: zhwesky2010 <1183042833@qq.com>
Date: Mon, 11 Aug 2025 18:08:43 +0800
Subject: [PATCH 13/24] some create api support more usage (#74494)

---
 python/paddle/utils/decorator_utils.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/python/paddle/utils/decorator_utils.py b/python/paddle/utils/decorator_utils.py
index a9bee20a6efae8..55adce7e8961c4 100644
--- a/python/paddle/utils/decorator_utils.py
+++ b/python/paddle/utils/decorator_utils.py
@@ -127,6 +127,21 @@ def __init__(
         self.default_params = default_params
         warnings.simplefilter("always", category=Warning)
 
+
+# *size => shape decorator
+class SizeArgsDecorator(DecoratorBase):
+    """
+    Usage Example:
+
+    paddle.ones(1, dtype=paddle.float32)
+    paddle.ones(1, 2, 3, dtype=paddle.float32)
+    paddle.ones([1, 2, 3], dtype=paddle.float32)
+    paddle.ones(size=[1, 2, 3], dtype=paddle.float32)
+
+    paddle.ones([1, 2, 3], paddle.float32)
+    paddle.ones(shape=[1, 2, 3], dtype=paddle.float32)
+    """
+
     def process(
         self, args: tuple[Any, ...], kwargs: dict[str, Any]
     ) -> tuple[tuple[Any, ...], dict[str, Any]]:

From 2864eb09e901d66871c0c8c6fc581484a8bf1c58 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Mon, 11 Aug 2025 12:29:41 +0000
Subject: [PATCH 14/24] [API-Compat] Static Graph and CPU end debug

---
 .../infer_symbolic_shape/unary_infer_sym.cc   |  42 ++-
 .../kernels/cpu/min_max_with_index_kernel.cc  |  96 ++++++
 python/paddle/tensor/compat.py                |  60 ++--
 test/legacy_test/test_compat_minmax.py        | 290 +++++++++++-------
 4 files changed, 347 insertions(+), 141 deletions(-)
 create mode 100644 paddle/phi/kernels/cpu/min_max_with_index_kernel.cc

diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc
index 16ee03501fa4ab..9bf285da4d77a9 100644
--- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc
+++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc
@@ -319,16 +319,33 @@ bool MinMaxOpInferSymbolicShape(pir::Operation *op,
                                 pir::InferSymbolicShapeContext *infer_context,
                                 bool output_val_and_ind = false) {
   bool flatten = GetBoolAttr(op, "flatten");
-  bool keepdims = GetBoolAttr(op, "keepdims");
+  bool keepdims = false;
+  int axis = 0;
 
+  if (output_val_and_ind) {
+    keepdims = GetBoolAttr(op, "keepdim");
+
+    PADDLE_ENFORCE_NE(
+        op->attributes().find("dim"),
+        op->attributes().end(),
+        common::errors::InvalidArgument(
+            "'dim' Attribute is expected for Min/MaxWithIndexOp. "));
+    axis = op->attributes()
+               .at("dim")
+               .dyn_cast<paddle::dialect::ScalarAttribute>()
+               .data()
+               .to<int64_t>();
+  } else {
+    keepdims = GetBoolAttr(op, "keepdims");
+    const auto &axis_shape_or_data =
+        infer_context->GetShapeOrDataForValue(op->operand_source(1));
+    axis = static_cast<int>(
+        axis_shape_or_data.data().value().at(0).Get<int64_t>());
+  }
   const auto &input_sym_shape =
       infer_context->GetShapeOrDataForValue(op->operand_source(0)).shape();
-  int rank = input_sym_shape.size();
 
-  const auto &axis_shape_or_data =
-      infer_context->GetShapeOrDataForValue(op->operand_source(1));
-  int axis =
-      static_cast<int>(axis_shape_or_data.data().value().at(0).Get<int64_t>());
+  int rank = input_sym_shape.size();
   if (axis < 0) axis += rank;
 
   const auto &out_sym_shape = [&] {
@@ -369,13 +386,20 @@ bool MinMaxOpInferSymbolicShape(pir::Operation *op,
     return MinMaxOpInferSymbolicShape(op, infer_context, output_val_and_ind); \
   }
 
-DEFINE_MINMAX_OP_INFER_FUNC(Argmin, false)
 DEFINE_MINMAX_OP_INFER_FUNC(Argmax, false)
-DEFINE_MINMAX_OP_INFER_FUNC(MinWithIndex, true)
 DEFINE_MINMAX_OP_INFER_FUNC(MaxWithIndex, true)
-
 #undef DEFINE_MINMAX_OP_INFER_FUNC
 
+bool ArgminOpInferSymbolicShape(pir::Operation *op,
+                                pir::InferSymbolicShapeContext *infer_context) {
+  return ArgmaxOpInferSymbolicShape(op, infer_context);
+}
+
+bool MinWithIndexOpInferSymbolicShape(
+    pir::Operation *op, pir::InferSymbolicShapeContext *infer_context) {
+  return MaxWithIndexOpInferSymbolicShape(op, infer_context);
+}
+
 bool AsComplexOpInferSymbolicShape(
     pir::Operation *op, pir::InferSymbolicShapeContext *infer_context) {
   pir::Value operand_source = op->operand_source(0);
diff --git a/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc b/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc
new file mode 100644
index 00000000000000..f373553389e422
--- /dev/null
+++ b/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc
@@ -0,0 +1,96 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/min_max_with_index_kernel.h"
+
+#include "paddle/common/ddim.h"
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/core/utils/data_type.h"
+#include "paddle/phi/kernels/funcs/eigen/common.h"
+#include "paddle/phi/kernels/funcs/math_function.h"
+
+#define DEFINE_WITH_INDEX_KERNEL(OpType, name)                          \
+  template <typename T, typename Context>                               \
+  void OpType##WithIndexKernel(const Context& dev_ctx,                  \
+                               const DenseTensor& x,                    \
+                               const Scalar& dim,                       \
+                               bool keepdim,                            \
+                               bool flatten,                            \
+                               DenseTensor* val_out,                    \
+                               DenseTensor* ind_out) {                  \
+    PADDLE_ENFORCE_EQ(0,                                                \
+                      1,                                                \
+                      phi::errors::Unimplemented(                       \
+                          "In static graph mode, %s PHI kernel is not " \
+                          "currently available on non-GPU devices.",    \
+                          #name));                                      \
+  }                                                                     \
+  template <typename T, typename Context>                               \
+  void OpType##WithIndexGradKernel(const Context& dev_ctx,              \
+                                   const DenseTensor& x,                \
+                                   const DenseTensor& values,           \
+                                   const DenseTensor& indices,          \
+                                   const DenseTensor& values_grad,      \
+                                   const Scalar& dim,                   \
+                                   bool keepdim,                        \
+                                   DenseTensor* x_grad) {               \
+    PADDLE_ENFORCE_EQ(0,                                                \
+                      1,                                                \
+                      phi::errors::Unimplemented(                       \
+                          "In static graph mode, %s PHI kernel is not " \
+                          "currently available on non-GPU devices.",    \
+                          #name));                                      \
+  }
+
+namespace phi {
+
+DEFINE_WITH_INDEX_KERNEL(Min, min_with_index)
+DEFINE_WITH_INDEX_KERNEL(Max, max_with_index)
+#undef DEFINE_WITH_INDEX_KERNEL
+
+}  // namespace phi
+
+#define REGISTER_CPU_KERNELS(OpType, OpName)                   \
+  PD_REGISTER_KERNEL(OpName,                                   \
+                     CPU,                                      \
+                     ALL_LAYOUT,                               \
+                     phi::OpType##WithIndexKernel,             \
+                     phi::dtype::float16,                      \
+                     phi::dtype::bfloat16,                     \
+                     float,                                    \
+                     double,                                   \
+                     int32_t,                                  \
+                     int64_t,                                  \
+                     int16_t,                                  \
+                     uint8_t) {                                \
+    kernel->OutputAt(0).SetDataType(kernel->InputAt(0).dtype); \
+    kernel->OutputAt(1).SetDataType(phi::DataType::INT64);     \
+  }                                                            \
+  PD_REGISTER_KERNEL(OpName##_grad,                            \
+                     CPU,                                      \
+                     ALL_LAYOUT,                               \
+                     phi::OpType##WithIndexGradKernel,         \
+                     float,                                    \
+                     double,                                   \
+                     uint8_t,                                  \
+                     int,                                      \
+                     int16_t,                                  \
+                     int64_t,                                  \
+                     phi::dtype::float16,                      \
+                     phi::dtype::bfloat16) {}
+
+REGISTER_CPU_KERNELS(Min, min_with_index)
+REGISTER_CPU_KERNELS(Max, max_with_index)
+#undef REGISTER_CPU_KERNELS
diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py
index 150e10dc27485a..fb7e21f5c5ebfe 100644
--- a/python/paddle/tensor/compat.py
+++ b/python/paddle/tensor/compat.py
@@ -471,7 +471,7 @@ def try_get_keys(key):
         and type(dim_or_other) is not int
     ):
         raise invalid_arguments_exception(
-            f"The second input must be int or Tensor or implicit None in compat.min, but received {type(dim_or_other)}.\n"
+            f"The second input must be int or Tensor or implicit None in compat.{func_name}, but received {type(dim_or_other)}.\n"
         )
 
     return dim_or_other, keepdim
@@ -500,11 +500,10 @@ def _min_max_allow_cpu_composite(input: Tensor):
         or in_dtype == paddle.bfloat16
         or in_dtype == paddle.int16
     ):
-        if not input.place.is_gpu_place():
-            raise TypeError(
-                f"Non-CUDA GPU placed Tensor does not have '{in_dtype}' op registered.\n"
-                "Paddle support following DataTypes: int32, int64, float64, float32, uint8"
-            )
+        raise TypeError(
+            f"Non-CUDA GPU placed Tensor does not have '{in_dtype}' op registered.\n"
+            "Paddle support following DataTypes: int32, int64, float64, float32, uint8"
+        )
 
 
 @ForbidKeywordsDecorator(
@@ -521,8 +520,12 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
         returns a named tuple MinMaxRetType(values: Tensor, indices: Tensor)
     3. paddle.compat.min(input: Tensor, other: Tensor): see `paddle.minimum`
 
-    Note: If there are multiple minimum elements, this API evenly distributes gradient between these equal values,
-        following torch.min. The gradient behavior of `values` for case 2 is the same as `paddle.amin`.
+    Special warning: the gradient behavior is NOT well-documented by PyTorch, the actual behavior should be:
+    1. Case 1: the same as `amin`
+    2. Case 2: NOT evenly distributing the gradient for equal minimum elements! PyTorch actually only propagates to the elements with indices,
+        for example: Tensor([1, 1, 1]) -> min(..., dim=0) -> values=Tensor(0, ...), indices=Tensor(0), the gradient for input tensor won't be
+        Tensor([1/3, 1/3, 1/3]) as stated in their documentation, but will be Tensor([1, 0, 0]). This API implements a similar backward kernel.
+    3. Case 3: the same as `minimum`
 
     Args:
         input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64 on GPU.
@@ -532,6 +535,8 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
             compute the minimum over all elements of `input` and return a Tensor with a single element,
             otherwise must be in the range :math:`[-input.ndim, input.ndim)`.
             If :math:`dim < 0`, the axis to reduce is :math:`input.ndim + dim`.
+            Warning: if `dim` is specified, execute static graph will throw exceptions
+            when not on a GPU device, since max_with_index is not implemented for non-GPU devices
         keepdim (bool, optional): Whether to reserve the reduced dimension in the
             output Tensor. The result tensor will have one fewer dimension
             than the `input` unless :attr:`keepdim` is true, default
@@ -605,13 +610,7 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
             )
         return paddle.amin(input)
     elif isinstance(dim_or_other, int):
-        if input.place.is_gpu_place():
-            vals, inds = _C_ops.min_with_index(
-                input, dim_or_other, keepdim, False
-            )
-            inds.stop_gradient = True
-            return MinMaxRetType(values=vals, indices=inds)
-        else:
+        if in_dynamic_mode() and not input.place.is_gpu_place():
             _min_max_allow_cpu_composite(input)
             # CPUPlace and other placements are implemented by composition
             indices = paddle.argmin(input, axis=dim_or_other, keepdim=True)
@@ -622,6 +621,12 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
                 values=values.squeeze_(axis=dim_or_other),
                 indices=indices.squeeze_(axis=dim_or_other),
             )
+        else:
+            vals, inds = _C_ops.min_with_index(
+                input, dim_or_other, keepdim, False
+            )
+            inds.stop_gradient = True
+            return MinMaxRetType(values=vals, indices=inds)
     else:
         return _C_ops.minimum(input, dim_or_other)
 
@@ -640,8 +645,12 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
         returns a named tuple MinMaxRetType(values: Tensor, indices: Tensor)
     3. paddle.compat.max(input: Tensor, other: Tensor): see `paddle.maximum`
 
-    Note: If there are multiple maximum elements, this API evenly distributes gradient between these equal values,
-        following torch.max. The gradient behavior of `values` for case 2 is the same as `paddle.amax`.
+    Special warning: the gradient behavior is NOT well-documented by PyTorch, the actual behavior should be:
+    1. Case 1: the same as `amax`
+    2. Case 2: NOT evenly distributing the gradient for equal maximum elements! PyTorch actually only propagates to the elements with indices,
+        for example: Tensor([1, 1, 1]) -> max(..., dim=0) -> values=Tensor(0, ...), indices=Tensor(0), the gradient for input tensor won't be
+        Tensor([1/3, 1/3, 1/3]) as stated in their documentation, but will be Tensor([1, 0, 0]). This API implements a similar backward kernel.
+    3. Case 3: the same as `maximum`
 
     Args:
         input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64 on GPU.
@@ -651,6 +660,8 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
             compute the maximum over all elements of `input` and return a Tensor with a single element,
             otherwise must be in the range :math:`[-input.ndim, input.ndim)`.
             If :math:`dim < 0`, the axis to reduce is :math:`input.ndim + dim`.
+            Warning: if `dim` is specified, execute static graph will throw exceptions
+            when not on a GPU device, since max_with_index is not implemented for non-GPU devices
         keepdim (bool, optional): Whether to reserve the reduced dimension in the
             output Tensor. The result tensor will have one fewer dimension
             than the `input` unless :attr:`keepdim` is true, default
@@ -724,15 +735,8 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
             )
         return paddle.amax(input)
     elif isinstance(dim_or_other, int):
-        if input.place.is_gpu_place():
-            vals, inds = _C_ops.max_with_index(
-                input, dim_or_other, keepdim, False
-            )
-            inds.stop_gradient = True
-            return MinMaxRetType(values=vals, indices=inds)
-        else:
+        if in_dynamic_mode() and not input.place.is_gpu_place():
             _min_max_allow_cpu_composite(input)
-            # CPUPlace and other placements are implemented by composition
             indices = paddle.argmax(input, axis=dim_or_other, keepdim=True)
             values = paddle.take_along_axis(input, indices, axis=dim_or_other)
             if keepdim:
@@ -741,5 +745,11 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
                 values=values.squeeze_(axis=dim_or_other),
                 indices=indices.squeeze_(axis=dim_or_other),
             )
+        else:
+            vals, inds = _C_ops.max_with_index(
+                input, dim_or_other, keepdim, False
+            )
+            inds.stop_gradient = True
+            return MinMaxRetType(values=vals, indices=inds)
     else:
         return _C_ops.maximum(input, dim_or_other)
diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py
index 4bf3f7ebed52f6..00245894df0480 100644
--- a/test/legacy_test/test_compat_minmax.py
+++ b/test/legacy_test/test_compat_minmax.py
@@ -19,26 +19,44 @@
 import paddle
 
 
-class TestCompatMinMax(unittest.TestCase):
-    def setUp(self):
-        """Make sure we are in a dynamic graph env"""
+class TestCompatMinMaxBase(unittest.TestCase):
+    """The default base class is for testing min-related ops"""
+
+    def __init__(
+        self,
+        *args,
+        test_op=paddle.compat.min,
+        origin_op=paddle.min,
+        index_op=paddle.argmin,
+        test_op_name="paddle.compat.min",
+        origin_op_name="paddle.min",
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
         paddle.disable_static()
+        self.test_op = test_op
+        self.origin_op = origin_op
+        self.index_op = index_op
+        self.test_op_name = test_op_name
+        self.origin_op_name = origin_op_name
 
     def test_case1_simple_reduce_all(self):
         data = paddle.to_tensor([[1.0, 2.0], [3.0, 4.0]], dtype='float32')
-        min_val = paddle.compat.min(data)
-        max_val = paddle.compat.max(data)
+        val = self.test_op(data)
 
-        self.assertAlmostEqual(min_val.item(), 1.0)
-        self.assertAlmostEqual(max_val.item(), 4.0)
+        if self.test_op_name.endswith("min"):
+            self.assertAlmostEqual(val.item(), 1.0)
+            expected_grad = np.array([[0.5, 0.5], [0.0, 0.0]])
+        else:
+            self.assertAlmostEqual(val.item(), 4.0)
+            expected_grad = np.array([[0.0, 0.0], [0.0, 1.0]])
 
         data = paddle.to_tensor(
             [[1.0, 1.0], [2.0, 3.0]], dtype='float32', stop_gradient=False
         )
-        min_val = paddle.compat.min(data)
-        min_val.backward()
+        val = self.test_op(data)
+        val.backward()
 
-        expected_grad = np.array([[0.5, 0.5], [0.0, 0.0]])
         np.testing.assert_allclose(data.grad.numpy(), expected_grad)
 
     def test_case2_reduce_dim(self):
@@ -46,34 +64,36 @@ def test_case2_reduce_dim(self):
         data = paddle.to_tensor(
             [[[5, 8], [2, 1]], [[7, 3], [9, 6]]], dtype='float32'
         )
+        if self.test_op_name.endswith("min"):
+            in_dim = 1
+            result = self.test_op(data, dim=in_dim)
+            expected_res = np.array([[[5, 3], [2, 1]]])
+            self.assertEqual(result.values.shape, [2, 2])
+            np.testing.assert_array_equal(
+                result.values.numpy(), np.array([[2, 1], [7, 3]])
+            )
+            np.testing.assert_array_equal(
+                result.indices.numpy(), np.array([[1, 1], [0, 0]])
+            )
+        else:
+            in_dim = 2
+            result = self.test_op(data, dim=in_dim)
+            expected_res = np.array([[[7, 8], [9, 6]]])
+            self.assertEqual(result.values.shape, [2, 2])
+            np.testing.assert_array_equal(
+                result.values.numpy(), np.array([[8, 2], [7, 9]])
+            )
+            np.testing.assert_array_equal(
+                result.indices.numpy(), np.array([[1, 0], [0, 0]])
+            )
 
-        min_result = paddle.compat.min(data, dim=1)
-        self.assertEqual(min_result.values.shape, [2, 2])
-        np.testing.assert_array_equal(
-            min_result.values.numpy(), np.array([[2, 1], [7, 3]])
-        )
-        np.testing.assert_array_equal(
-            min_result.indices.numpy(), np.array([[1, 1], [0, 0]])
-        )
-
-        max_result = paddle.compat.max(data, dim=2)
-        self.assertEqual(max_result.values.shape, [2, 2])
-        np.testing.assert_array_equal(
-            max_result.values.numpy(), np.array([[8, 2], [7, 9]])
-        )
-        np.testing.assert_array_equal(
-            max_result.indices.numpy(), np.array([[1, 0], [0, 0]])
-        )
-
-        min_result_keep = paddle.compat.min(data, dim=0, keepdim=True)
-        self.assertEqual(min_result_keep.values.shape, [1, 2, 2])
-        np.testing.assert_array_equal(
-            min_result_keep.values.numpy(), np.array([[[5, 3], [2, 1]]])
-        )
+        result_keep = self.test_op(data, dim=0, keepdim=True)
+        self.assertEqual(result_keep.values.shape, [1, 2, 2])
+        np.testing.assert_array_equal(result_keep.values.numpy(), expected_res)
 
-        min_result_neg = paddle.compat.min(data, dim=-2)
+        result_neg = self.test_op(data, dim=in_dim - 3)
         np.testing.assert_array_equal(
-            min_result_neg.values.numpy(), min_result.values.numpy()
+            result_neg.values.numpy(), result.values.numpy()
         )
 
     def test_case2_grad(self):
@@ -84,43 +104,52 @@ def test_case2_grad(self):
         )
         y = data * 2
 
-        min_result = paddle.compat.min(y, dim=2)
-        min_result.values.backward()
+        result = self.test_op(y, dim=2)
+        result.values.backward()
 
-        expected_grad = np.array(
-            [[[2.0, 0.0], [2.0, 0.0]], [[0.0, 2.0], [0.0, 2.0]]]
-        )
+        if self.test_op_name.endswith("min"):
+            expected_grad = np.array(
+                [[[2.0, 0.0], [2.0, 0.0]], [[0.0, 2.0], [0.0, 2.0]]]
+            )
+            expected_grad2 = np.array(
+                [[[2.0, 4.0], [0.0, 0.0]], [[8.0, 2.0], [0.0, 0.0]]]
+            )
+        else:
+            expected_grad = np.array(
+                [[[0.0, 2.0], [0.0, 2.0]], [[2.0, 0.0], [2.0, 0.0]]]
+            )
+            expected_grad2 = np.array(
+                [[[2.0, 0.0], [0.0, 6.0]], [[0.0, 2.0], [10.0, 0.0]]]
+            )
         np.testing.assert_allclose(data.grad.numpy(), expected_grad, atol=1e-6)
 
         data.clear_grad()
         y = data * data
-        min_result = paddle.compat.min(y, dim=1)
-        min_result[0].backward()
-        expected_grad = np.array(
-            [[[2.0, 4.0], [0.0, 0.0]], [[8.0, 2.0], [0.0, 0.0]]]
-        )
-        np.testing.assert_allclose(data.grad.numpy(), expected_grad, atol=1e-6)
+        result = self.test_op(y, dim=1)
+        result[0].backward()
+        np.testing.assert_allclose(data.grad.numpy(), expected_grad2, atol=1e-6)
 
     def test_case3_elementwise(self):
-        """minimum/maximum"""
         x = paddle.to_tensor([[1, 5], [4, 2]], dtype='float32')
         y = paddle.to_tensor([[3, 2], [1, 6]], dtype='float32')
-
-        min_result = paddle.compat.min(x, y)
-        np.testing.assert_array_equal(
-            min_result.numpy(), np.array([[1, 2], [1, 2]])
-        )
-
-        max_result = paddle.compat.max(x, y)
-        np.testing.assert_array_equal(
-            max_result.numpy(), np.array([[3, 5], [4, 6]])
-        )
-
         z = paddle.to_tensor([3, 4], dtype='float32')
-        broadcast_min = paddle.compat.min(x, z)
-        np.testing.assert_array_equal(
-            broadcast_min.numpy(), np.array([[1, 4], [3, 2]])
-        )
+        broadcast_res = self.test_op(x, z)
+
+        result = self.test_op(x, y)
+        if self.test_op_name.endswith("min"):
+            np.testing.assert_array_equal(
+                result.numpy(), np.array([[1, 2], [1, 2]])
+            )
+            np.testing.assert_array_equal(
+                broadcast_res.numpy(), np.array([[1, 4], [3, 2]])
+            )
+        else:
+            np.testing.assert_array_equal(
+                result.numpy(), np.array([[3, 5], [4, 6]])
+            )
+            np.testing.assert_array_equal(
+                broadcast_res.numpy(), np.array([[3, 5], [4, 4]])
+            )
 
     def test_case3_grad(self):
         x = paddle.to_tensor(
@@ -130,13 +159,16 @@ def test_case3_grad(self):
             [[0.5, 2.5], [2.0, 3.5]], dtype=paddle.float32, stop_gradient=False
         )
 
-        min_val = paddle.compat.min(x, y)
-        min_val.backward()
+        val = self.test_op(x, y)
+        val.backward()
 
         expected_x_grad = np.array([[0.0, 1.0], [0.0, 0.0]])
-        np.testing.assert_allclose(x.grad.numpy(), expected_x_grad)
-
         expected_y_grad = np.array([[1.0, 0.0], [1.0, 1.0]])
+        if self.test_op_name.endswith("max"):
+            expected_x_grad = 1 - expected_x_grad
+            expected_y_grad = 1 - expected_y_grad
+
+        np.testing.assert_allclose(x.grad.numpy(), expected_x_grad)
         np.testing.assert_allclose(y.grad.numpy(), expected_y_grad)
 
     def test_edge_cases(self):
@@ -144,34 +176,32 @@ def test_edge_cases(self):
         # uniform distributed gradient
         uniform_data = paddle.ones([2, 3], dtype='float64')
         uniform_data.stop_gradient = False
-        min_val = paddle.compat.min(uniform_data)
-        min_val.sum().backward()
-        # uniformly distributed (amin)
+        val = self.test_op(uniform_data)
+        val.sum().backward()
+        # uniformly distributed
         expected_grad = np.full((2, 3), 1.0 / 6.0)
         np.testing.assert_allclose(uniform_data.grad.numpy(), expected_grad)
 
         uniform_data.clear_grad()
-        min_val = paddle.compat.min(uniform_data, 0)
-        min_val.values.sum().backward()
+        val = self.test_op(uniform_data, 0)
+        val.values.sum().backward()
         # take_along_axis like gradient behavior
         expected_grad = np.array([[1.0, 1.0, 1.0], [0.0, 0.0, 0.0]])
         np.testing.assert_allclose(uniform_data.grad.numpy(), expected_grad)
 
         # 0-dim tensor
         dim0_tensor = paddle.to_tensor(2, dtype='float32')
-        max_val = paddle.compat.max(dim0_tensor)
-        np.testing.assert_allclose(
-            max_val.numpy(), np.array(2.0, dtype=np.float32)
-        )
+        val = self.test_op(dim0_tensor)
+        np.testing.assert_allclose(val.numpy(), np.array(2.0, dtype=np.float32))
 
         # 1-dim tensor
         dim1_tensor = paddle.to_tensor([1], dtype='uint8')
-        max_val = paddle.compat.max(dim1_tensor, dim=-1, keepdim=True)
+        val = self.test_op(dim1_tensor, dim=-1, keepdim=True)
         np.testing.assert_array_equal(
-            max_val[0].numpy(), np.array([1], dtype=np.uint8)
+            val[0].numpy(), np.array([1], dtype=np.uint8)
         )
         np.testing.assert_array_equal(
-            max_val[1].numpy(), np.array([0], dtype=np.int64)
+            val[1].numpy(), np.array([0], dtype=np.int64)
         )
 
     def test_compare_with_index_ops_to_origin(self):
@@ -183,13 +213,13 @@ def test_compare_with_index_ops_to_origin(self):
             # `bfloat16` and `float16` are rejected on CPU
             if not data.place.is_gpu_place() and dtype in cpu_reject_types:
                 continue
-            min_vals_inds = paddle.compat.min(data, dim=0)
-            self.assertEqual(min_vals_inds.values.dtype, data.dtype)
-            self.assertEqual(min_vals_inds.indices.dtype, paddle.int64)
+            vals_inds = self.test_op(data, dim=0)
+            self.assertEqual(vals_inds.values.dtype, data.dtype)
+            self.assertEqual(vals_inds.indices.dtype, paddle.int64)
 
-            origin_indices = paddle.argmin(data, axis=0, dtype="int64")
+            origin_indices = self.index_op(data, axis=0, dtype="int64")
             if dtype != 'uint8':
-                origin_values = paddle.min(data, axis=0)
+                origin_values = self.origin_op(data, axis=0)
             else:
                 origin_values = paddle.take_along_axis(
                     data, origin_indices.unsqueeze(0), axis=0
@@ -197,14 +227,14 @@ def test_compare_with_index_ops_to_origin(self):
                 origin_values.squeeze_(axis=0)
             if i < 4:  # floating point
                 np.testing.assert_allclose(
-                    min_vals_inds.values.numpy(), origin_values.numpy()
+                    vals_inds.values.numpy(), origin_values.numpy()
                 )
             else:
                 np.testing.assert_array_equal(
-                    min_vals_inds.values.numpy(), origin_values.numpy()
+                    vals_inds.values.numpy(), origin_values.numpy()
                 )
             np.testing.assert_array_equal(
-                min_vals_inds[1].numpy(), origin_indices.numpy()
+                vals_inds[1].numpy(), origin_indices.numpy()
             )
 
     def test_error_handling(self):
@@ -214,12 +244,12 @@ def test_error_handling(self):
             "Tensors with integral type: 'paddle.int32' should stop gradient."
         )
         err_msg2 = (
-            "paddle.min() received unexpected keyword arguments 'input', 'dim'. "
-            "\nDid you mean to use paddle.compat.min() instead?"
+            f"{self.origin_op_name}() received unexpected keyword arguments 'input', 'dim'. "
+            f"\nDid you mean to use {self.test_op_name}() instead?"
         )
         err_msg3 = (
-            "paddle.compat.max() received unexpected keyword argument 'axis'. "
-            "\nDid you mean to use paddle.max() instead?"
+            f"{self.test_op_name}() received unexpected keyword argument 'axis'. "
+            f"\nDid you mean to use {self.origin_op_name}() instead?"
         )
         err_msg4 = (
             "Non-CUDA GPU placed Tensor does not have 'paddle.float16' op registered.\n"
@@ -229,82 +259,128 @@ def test_error_handling(self):
         # empty tensor
         empty_tensor = paddle.to_tensor([], dtype='float32')
         with self.assertRaises(ValueError):
-            paddle.compat.min(empty_tensor)
+            self.test_op(empty_tensor)
 
         # mixed parameters case 1
         input_ts = paddle.to_tensor([1, 2, 3], dtype='float32')
         other_ts = paddle.to_tensor([1])
         with self.assertRaises(TypeError):
-            paddle.compat.min(input_ts, other=other_ts, dim=0)
+            self.test_op(input_ts, other=other_ts, dim=0)
 
         # mixed parameters case 2
         with self.assertRaises(TypeError):
-            paddle.compat.min(input_ts, 0, other=other_ts)
+            self.test_op(input_ts, 0, other=other_ts)
 
         # trying to perform grad ops for integral types
         with self.assertRaises(TypeError) as cm:
             tensor = paddle.ones([2, 2], dtype=paddle.int32)
             tensor.stop_gradient = False
-            tensors = paddle.compat.max(tensor, dim=0)
+            tensors = self.test_op(tensor, dim=0)
         self.assertEqual(str(cm.exception), err_msg1)
 
         # explicit None case 1
         with self.assertRaises(TypeError) as cm:
-            paddle.compat.min(input_ts, dim=None)
+            self.test_op(input_ts, dim=None)
 
         # explicit None case 2
         with self.assertRaises(TypeError) as cm:
-            paddle.compat.min(input_ts, None, keepdim=True)
+            self.test_op(input_ts, None, keepdim=True)
 
         # keepdim specified without specifying dim
         with self.assertRaises(TypeError) as cm:
-            paddle.compat.min(input_ts, keepdim=True)
+            self.test_op(input_ts, keepdim=True)
 
         # Wrong *args specification case 1
         with self.assertRaises(TypeError) as cm:
-            paddle.compat.min(input_ts, False)
+            self.test_op(input_ts, False)
 
         # Wrong *args specification case 2
         with self.assertRaises(TypeError) as cm:
-            paddle.compat.min(input_ts, other_ts, True)
+            self.test_op(input_ts, other_ts, True)
 
         # Tensor input for dim case 1
         with self.assertRaises(TypeError) as cm:
-            paddle.compat.min(input_ts, dim=paddle.to_tensor([0]))
+            self.test_op(input_ts, dim=paddle.to_tensor([0]))
 
         # Tensor input for dim case 2
         with self.assertRaises(TypeError) as cm:
-            paddle.compat.min(input_ts, dim=paddle.to_tensor(0))
+            self.test_op(input_ts, dim=paddle.to_tensor(0))
 
         # Duplicate Arguments case 1
         with self.assertRaises(TypeError) as cm:
-            paddle.compat.max(input_ts, 0, dim=0)
+            self.test_op(input_ts, 0, dim=0)
 
         # Duplicate Arguments case 2
         with self.assertRaises(TypeError) as cm:
-            paddle.compat.max(input_ts, other_ts, other=0)
+            self.test_op(input_ts, other_ts, other=0)
 
         # Duplicate Arguments case 3
         with self.assertRaises(TypeError) as cm:
-            paddle.compat.max(input_ts, dim=0, other=0, keepdim=True)
+            self.test_op(input_ts, dim=0, other=0, keepdim=True)
 
         # Wrong API used case 1
         with self.assertRaises(TypeError) as cm:
-            paddle.min(input=input_ts, dim=0)
+            self.origin_op(input=input_ts, dim=0)
         self.assertEqual(str(cm.exception), err_msg2)
 
         # Wrong API used case 2
         with self.assertRaises(TypeError) as cm:
-            paddle.compat.max(input_ts, axis=0)
+            self.test_op(input_ts, axis=0)
         self.assertEqual(str(cm.exception), err_msg3)
 
         # Rejected on CPU types
         with self.assertRaises(TypeError) as cm:
             tensor = paddle.to_tensor([1, 2, 3], dtype="float16")
             cpu_tensor = tensor.to("cpu")
-            paddle.compat.max(cpu_tensor, dim=0)
+            self.test_op(cpu_tensor, dim=0)
         self.assertEqual(str(cm.exception), err_msg4)
 
+    def _compare_with_origin_static(self, input_shape, axis=0, keepdim=False):
+        if not paddle.is_compiled_with_cuda():
+            return
+        numel = 1
+        for v in input_shape:
+            numel *= v
+        paddle.enable_static()
+        with paddle.static.program_guard(paddle.static.Program()):
+            input_tensor = paddle.arange(numel, dtype=paddle.float32).reshape(
+                input_shape
+            )
+
+            y = input_tensor**2 + input_tensor
+            values, indices = self.test_op(y, dim=axis, keepdim=keepdim)
+            values += 1
+
+            gt_values = self.origin_op(y, axis=axis, keepdim=keepdim) + 1
+            gt_indices = self.index_op(y, axis=axis, keepdim=keepdim)
+
+            place = paddle.CUDAPlace(0)
+            exe = paddle.static.Executor(place)
+            values_np, indices_np, gt_values_np, gt_indices_np = exe.run(
+                fetch_list=[values, indices, gt_values, gt_indices]
+            )
+            np.testing.assert_allclose(values_np, gt_values_np)
+            np.testing.assert_equal(indices_np, gt_indices_np)
+        paddle.disable_static()
+
+    def test_static_graph(self):
+        self._compare_with_origin_static([3, 10, 2], axis=1)
+        self._compare_with_origin_static([3, 10, 2], axis=0, keepdim=True)
+        self._compare_with_origin_static([17], axis=0)
+
+
+class TestCompatMax(TestCompatMinMaxBase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(
+            *args,
+            test_op=paddle.compat.max,
+            origin_op=paddle.max,
+            index_op=paddle.argmax,
+            test_op_name="paddle.compat.max",
+            origin_op_name="paddle.max",
+            **kwargs,
+        )
+
 
 if __name__ == '__main__':
     unittest.main()

From 693ff5289ed9f12a26e8eefcdb40c96bb76dec73 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Mon, 11 Aug 2025 13:22:16 +0000
Subject: [PATCH 15/24] [API-Compat] Resolved conflicts in decorator_utils.py

---
 python/paddle/utils/decorator_utils.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/python/paddle/utils/decorator_utils.py b/python/paddle/utils/decorator_utils.py
index 55adce7e8961c4..5651962ea14341 100644
--- a/python/paddle/utils/decorator_utils.py
+++ b/python/paddle/utils/decorator_utils.py
@@ -362,3 +362,28 @@ def wrapper(*args: _InputT.args, **kwargs: _InputT.kwargs) -> _RetT:
         return wrapper
 
     return decorator
+class ForbidKeywordsDecorator(DecoratorBase):
+    """A decorator that hints users to use the correct `compat` functions, when erroneous keyword arguments are detected"""
+
+    def __init__(
+        self, illegal_keys: list[str], func_name: str, correct_name: str
+    ) -> None:
+        super().__init__()
+        self.illegal_keys = illegal_keys
+        self.func_name = func_name
+        self.correct_name = correct_name
+
+    def process(
+        self, args: tuple[Any, ...], kwargs: dict[str, Any]
+    ) -> tuple[tuple[Any, ...], dict[str, Any]]:
+        found_keys = [key for key in self.illegal_keys if key in kwargs]
+
+        if found_keys:
+            keys_str = ", ".join(f"'{key}'" for key in found_keys)
+            plural = "s" if len(found_keys) > 1 else ""
+
+            raise TypeError(
+                f"{self.func_name}() received unexpected keyword argument{plural} {keys_str}. "
+                f"\nDid you mean to use {self.correct_name}() instead?"
+            )
+        return args, kwargs

From f3d7353a77b02eb17fdde0ebac2fdbd914041f7f Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Wed, 13 Aug 2025 06:59:39 +0000
Subject: [PATCH 16/24] [API-Compat] Added static graph min/max_with_index op
 check, simplified implementation

---
 python/paddle/tensor/compat.py                |  52 ++--
 python/paddle/tensor/math.py                  |   4 +-
 python/paddle/utils/decorator_utils.py        |   3 +-
 test/legacy_test/test_compat_minmax.py        |  36 ++-
 test/legacy_test/test_minmax_with_index_op.py | 281 ++++++++++++++++++
 5 files changed, 331 insertions(+), 45 deletions(-)
 create mode 100644 test/legacy_test/test_minmax_with_index_op.py

diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py
index fb7e21f5c5ebfe..7b5e035e551d40 100644
--- a/python/paddle/tensor/compat.py
+++ b/python/paddle/tensor/compat.py
@@ -424,7 +424,6 @@ def try_get_keys(key):
         except KeyError:
             raise invalid_arguments_exception() from None
         return res
-        found_key = None
 
     dim_or_other = None
     keepdim = False
@@ -436,25 +435,19 @@ def try_get_keys(key):
     elif total_arg_num == 2:
         if num_args == 2:
             dim_or_other, keepdim = args
-            if dim_or_other is None or isinstance(
-                dim_or_other, (Variable, paddle.pir.Value)
-            ):
-                raise invalid_arguments_exception()
         elif num_args == 1:
             dim_or_other = args[0]
-            if dim_or_other is None or isinstance(
-                dim_or_other, (Variable, paddle.pir.Value)
-            ):
-                raise invalid_arguments_exception()
             keepdim = try_get_keys("keepdim")
         else:
             dim_or_other = try_get_keys("dim")
             keepdim = try_get_keys("keepdim")
+        if dim_or_other is None or isinstance(
+            dim_or_other, (Variable, paddle.pir.Value)
+        ):
+            raise invalid_arguments_exception()
     elif total_arg_num == 1:
         if num_args:
             dim_or_other = args[0]
-            if dim_or_other is None:
-                raise invalid_arguments_exception()
         else:
             if "dim" in kwargs:
                 dim_or_other = kwargs["dim"]
@@ -462,8 +455,8 @@ def try_get_keys(key):
                 dim_or_other = kwargs["other"]
                 if not isinstance(dim_or_other, (Variable, paddle.pir.Value)):
                     raise invalid_arguments_exception()
-            if dim_or_other is None:
-                raise invalid_arguments_exception()
+        if dim_or_other is None:
+            raise invalid_arguments_exception()
 
     if (
         dim_or_other is not None
@@ -507,9 +500,9 @@ def _min_max_allow_cpu_composite(input: Tensor):
 
 
 @ForbidKeywordsDecorator(
-    illegal_keys=['x', 'axis'],
+    illegal_keys={"x", "axis"},
     func_name="paddle.compat.min",
-    correct_name='paddle.min',
+    correct_name="paddle.min",
 )
 def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
     """
@@ -521,7 +514,7 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
     3. paddle.compat.min(input: Tensor, other: Tensor): see `paddle.minimum`
 
     Special warning: the gradient behavior is NOT well-documented by PyTorch, the actual behavior should be:
-    1. Case 1: the same as `amin`
+    1. Case 1: the same as `min`
     2. Case 2: NOT evenly distributing the gradient for equal minimum elements! PyTorch actually only propagates to the elements with indices,
         for example: Tensor([1, 1, 1]) -> min(..., dim=0) -> values=Tensor(0, ...), indices=Tensor(0), the gradient for input tensor won't be
         Tensor([1/3, 1/3, 1/3]) as stated in their documentation, but will be Tensor([1, 0, 0]). This API implements a similar backward kernel.
@@ -593,9 +586,7 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
                 [[0.20000000, 0.30000000, 0.10000000, 0.20000000],
                  [0.10000000, 0.10000000, 0.60000000, 0.70000000]])
     """
-    if not isinstance(input, paddle.pir.Value) and not isinstance(
-        input, paddle.Tensor
-    ):
+    if not isinstance(input, (paddle.pir.Value, paddle.Tensor)):
         raise TypeError(
             f"input should be a tensor, but got an instance with type '{type(input).__name__}'"
         )
@@ -604,11 +595,8 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
     dim_or_other, keepdim = _min_max_param_checker("min", *args, **kwargs)
 
     if dim_or_other is None:
-        if input.numel() == 0:
-            raise ValueError(
-                "Reduce max cannot apply on empty tensor (numel == 0)"
-            )
-        return paddle.amin(input)
+        # paddle.min and paddle.amin actually shares the same grad op (ReduceAminKernel)
+        return paddle.min(input)
     elif isinstance(dim_or_other, int):
         if in_dynamic_mode() and not input.place.is_gpu_place():
             _min_max_allow_cpu_composite(input)
@@ -632,9 +620,9 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
 
 
 @ForbidKeywordsDecorator(
-    illegal_keys=['x', 'axis'],
+    illegal_keys={"x", "axis"},
     func_name="paddle.compat.max",
-    correct_name='paddle.max',
+    correct_name="paddle.max",
 )
 def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
     """
@@ -646,7 +634,7 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
     3. paddle.compat.max(input: Tensor, other: Tensor): see `paddle.maximum`
 
     Special warning: the gradient behavior is NOT well-documented by PyTorch, the actual behavior should be:
-    1. Case 1: the same as `amax`
+    1. Case 1: the same as `max`
     2. Case 2: NOT evenly distributing the gradient for equal maximum elements! PyTorch actually only propagates to the elements with indices,
         for example: Tensor([1, 1, 1]) -> max(..., dim=0) -> values=Tensor(0, ...), indices=Tensor(0), the gradient for input tensor won't be
         Tensor([1/3, 1/3, 1/3]) as stated in their documentation, but will be Tensor([1, 0, 0]). This API implements a similar backward kernel.
@@ -718,9 +706,7 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
                 [[0.50000000, 0.40000000, 0.50000000, 0.90000000],
                  [0.30000000, 0.20000000, 0.60000000, 0.70000000]])
     """
-    if not isinstance(input, paddle.pir.Value) and not isinstance(
-        input, paddle.Tensor
-    ):
+    if not isinstance(input, (paddle.pir.Value, paddle.Tensor)):
         raise TypeError(
             f"input should be a tensor, but got an instance with type '{type(input).__name__}'"
         )
@@ -729,11 +715,7 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
     dim_or_other, keepdim = _min_max_param_checker("max", *args, **kwargs)
 
     if dim_or_other is None:
-        if input.numel() == 0:
-            raise ValueError(
-                "Reduce max cannot apply on empty tensor (numel == 0)"
-            )
-        return paddle.amax(input)
+        return paddle.max(input)
     elif isinstance(dim_or_other, int):
         if in_dynamic_mode() and not input.place.is_gpu_place():
             _min_max_allow_cpu_composite(input)
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index da5eedfe992c26..62ff59ac412546 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -3275,7 +3275,7 @@ def _check_input(x):
 
 
 @ForbidKeywordsDecorator(
-    illegal_keys=["input", "dim", "other"],
+    illegal_keys={"input", "dim", "other"},
     func_name="paddle.max",
     correct_name="paddle.compat.max",
 )
@@ -3439,7 +3439,7 @@ def max(
 
 
 @ForbidKeywordsDecorator(
-    illegal_keys=["input", "dim", "other"],
+    illegal_keys={"input", "dim", "other"},
     func_name="paddle.min",
     correct_name="paddle.compat.min",
 )
diff --git a/python/paddle/utils/decorator_utils.py b/python/paddle/utils/decorator_utils.py
index 5651962ea14341..8c3381463f28a7 100644
--- a/python/paddle/utils/decorator_utils.py
+++ b/python/paddle/utils/decorator_utils.py
@@ -366,7 +366,7 @@ class ForbidKeywordsDecorator(DecoratorBase):
     """A decorator that hints users to use the correct `compat` functions, when erroneous keyword arguments are detected"""
 
     def __init__(
-        self, illegal_keys: list[str], func_name: str, correct_name: str
+        self, illegal_keys: set[str], func_name: str, correct_name: str
     ) -> None:
         super().__init__()
         self.illegal_keys = illegal_keys
@@ -379,6 +379,7 @@ def process(
         found_keys = [key for key in self.illegal_keys if key in kwargs]
 
         if found_keys:
+            found_keys.sort()
             keys_str = ", ".join(f"'{key}'" for key in found_keys)
             plural = "s" if len(found_keys) > 1 else ""
 
diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py
index 00245894df0480..46d4bb8f257a0a 100644
--- a/test/legacy_test/test_compat_minmax.py
+++ b/test/legacy_test/test_compat_minmax.py
@@ -17,6 +17,7 @@
 import numpy as np
 
 import paddle
+from paddle.base import core
 
 
 class TestCompatMinMaxBase(unittest.TestCase):
@@ -90,6 +91,8 @@ def test_case2_reduce_dim(self):
         result_keep = self.test_op(data, dim=0, keepdim=True)
         self.assertEqual(result_keep.values.shape, [1, 2, 2])
         np.testing.assert_array_equal(result_keep.values.numpy(), expected_res)
+        result_keep = self.test_op(data, 0, keepdim=True)
+        np.testing.assert_array_equal(result_keep.values.numpy(), expected_res)
 
         result_neg = self.test_op(data, dim=in_dim - 3)
         np.testing.assert_array_equal(
@@ -206,13 +209,10 @@ def test_edge_cases(self):
 
     def test_compare_with_index_ops_to_origin(self):
         dtypes = ['float32', 'float64', 'int32', 'int64', 'uint8']
-        cpu_reject_types = {'int16', 'bfloat16', 'float16'}
 
         for i, dtype in enumerate(dtypes):
             data = paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype=dtype)
-            # `bfloat16` and `float16` are rejected on CPU
-            if not data.place.is_gpu_place() and dtype in cpu_reject_types:
-                continue
+            # `bfloat16`, `uint8` and `float16` are rejected for min/argmin
             vals_inds = self.test_op(data, dim=0)
             self.assertEqual(vals_inds.values.dtype, data.dtype)
             self.assertEqual(vals_inds.indices.dtype, paddle.int64)
@@ -244,7 +244,7 @@ def test_error_handling(self):
             "Tensors with integral type: 'paddle.int32' should stop gradient."
         )
         err_msg2 = (
-            f"{self.origin_op_name}() received unexpected keyword arguments 'input', 'dim'. "
+            f"{self.origin_op_name}() received unexpected keyword arguments 'dim', 'input'. "
             f"\nDid you mean to use {self.test_op_name}() instead?"
         )
         err_msg3 = (
@@ -255,6 +255,9 @@ def test_error_handling(self):
             "Non-CUDA GPU placed Tensor does not have 'paddle.float16' op registered.\n"
             "Paddle support following DataTypes: int32, int64, float64, float32, uint8"
         )
+        err_msg5 = (
+            "input should be a tensor, but got an instance with type 'list'"
+        )
 
         # empty tensor
         empty_tensor = paddle.to_tensor([], dtype='float32')
@@ -306,6 +309,14 @@ def test_error_handling(self):
         with self.assertRaises(TypeError) as cm:
             self.test_op(input_ts, dim=paddle.to_tensor(0))
 
+        # Tensor input for dim case 3
+        with self.assertRaises(TypeError) as cm:
+            self.test_op(input_ts, paddle.to_tensor([0]), keepdim=True)
+
+        # Tensor input for dim case 4
+        with self.assertRaises(TypeError) as cm:
+            self.test_op(input_ts, paddle.to_tensor([0]), True)
+
         # Duplicate Arguments case 1
         with self.assertRaises(TypeError) as cm:
             self.test_op(input_ts, 0, dim=0)
@@ -335,9 +346,16 @@ def test_error_handling(self):
             self.test_op(cpu_tensor, dim=0)
         self.assertEqual(str(cm.exception), err_msg4)
 
+        # Wrong input type
+        with self.assertRaises(TypeError) as cm:
+            self.test_op([1, 2])
+        self.assertEqual(str(cm.exception), err_msg5)
+
+        # Wrong second parameter type
+        with self.assertRaises(TypeError):
+            self.test_op(input_ts, "first_dim")
+
     def _compare_with_origin_static(self, input_shape, axis=0, keepdim=False):
-        if not paddle.is_compiled_with_cuda():
-            return
         numel = 1
         for v in input_shape:
             numel *= v
@@ -363,6 +381,10 @@ def _compare_with_origin_static(self, input_shape, axis=0, keepdim=False):
             np.testing.assert_equal(indices_np, gt_indices_np)
         paddle.disable_static()
 
+    @unittest.skipIf(
+        not core.is_compiled_with_cuda(),
+        "core is not compiled with CUDA, skipping",
+    )
     def test_static_graph(self):
         self._compare_with_origin_static([3, 10, 2], axis=1)
         self._compare_with_origin_static([3, 10, 2], axis=0, keepdim=True)
diff --git a/test/legacy_test/test_minmax_with_index_op.py b/test/legacy_test/test_minmax_with_index_op.py
new file mode 100644
index 00000000000000..cf1ff6f6bd5dc9
--- /dev/null
+++ b/test/legacy_test/test_minmax_with_index_op.py
@@ -0,0 +1,281 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from op_test import OpTest
+
+import paddle
+from paddle.base import core
+
+np.random.seed(0)
+paddle.enable_static()
+
+
+def max_with_index(x, dim=None, keepdim=False):
+    """makeshift wrapper for the C++ op, extracted from compat.max"""
+    vals, inds = paddle._C_ops.max_with_index(x, dim, keepdim, False)
+    inds.stop_gradient = True
+    return vals, inds
+
+
+def min_with_index(x, dim=None, keepdim=False):
+    """makeshift wrapper for the C++ op, extracted from compat.min"""
+    vals, inds = paddle._C_ops.min_with_index(x, dim, keepdim, False)
+    inds.stop_gradient = True
+    return vals, inds
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not compiled with CUDA, skipping",
+)
+class TestMaxWithIndexBasic(OpTest):
+    def setUp(self):
+        self.set_op_input_attr()
+        self.set_testing_op()
+        self.set_data_type()
+        self.set_input_shape()
+        if self.is_int:
+            inputs = np.random.randint(0, 255, self.input_shape).astype(
+                self.dtype
+            )
+        else:
+            inputs = np.random.rand(*self.input_shape).astype(self.dtype)
+
+        self.prim_op_type = "prim"
+        self.python_out_sig = ["values", "indices"]
+        self.attrs = {"dim": self.dim, "keepdim": self.keepdim}
+
+        gt_values = self.value_op(inputs, axis=self.dim, keepdims=self.keepdim)
+        gt_indices = self.index_op(inputs, axis=self.dim, keepdims=self.keepdim)
+        self.inputs = {
+            'x': inputs,
+        }
+        self.outputs = {
+            'values': gt_values,
+            'indices': gt_indices,
+        }
+
+    def compute_grad(self):
+        grad = np.zeros_like(self.inputs['x'], dtype=self.dtype)
+        indices = (
+            self.outputs['indices']
+            if self.keepdim
+            else np.expand_dims(self.outputs['indices'], axis=self.dim)
+        )
+        np.put_along_axis(grad, indices, 1, axis=self.dim)
+        return grad
+
+    def set_testing_op(self):
+        self.op_type = "max_with_index"
+        self.python_api = max_with_index
+        self.public_python_api = max_with_index
+        self.value_op = np.max
+        self.index_op = np.argmax
+
+    def set_data_type(self):
+        self.dtype = np.float64
+        self.is_int = False
+
+    def set_input_shape(self):
+        self.input_shape = [30, 257, 21]
+
+    def set_op_input_attr(self):
+        self.dim = 0
+        self.keepdim = False
+
+    def test_check_output(self):
+        self.check_output(check_pir=True)
+
+    def test_check_grad(self):
+        grad = self.compute_grad()
+        self.check_grad(
+            ['x'],
+            'values',
+            check_pir=True,
+            user_defined_grads=[grad * (1.0 / grad.sum())],
+        )
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not compiled with CUDA, skipping",
+)
+class TestMinWithIndexBasic(TestMaxWithIndexBasic):
+    def set_testing_op(self):
+        self.op_type = "min_with_index"
+        self.python_api = min_with_index
+        self.public_python_api = min_with_index
+        self.value_op = np.min
+        self.index_op = np.argmin
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not compiled with CUDA, skipping",
+)
+class TestMinWithIndexKeepDim(TestMinWithIndexBasic):
+    def set_op_input_attr(self):
+        self.dim = 1
+        self.keepdim = True
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not compiled with CUDA, skipping",
+)
+class TestMaxWithIndexKeepDim(TestMaxWithIndexBasic):
+    def set_op_input_attr(self):
+        self.dim = 1
+        self.keepdim = True
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not compiled with CUDA, skipping",
+)
+class TestMinWithIndexNegDim(TestMinWithIndexBasic):
+    def set_op_input_attr(self):
+        self.dim = -1
+        self.keepdim = False
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not compiled with CUDA, skipping",
+)
+class TestMaxWithIndexNegDim(TestMaxWithIndexBasic):
+    def set_op_input_attr(self):
+        self.dim = 1
+        self.keepdim = False
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not compiled with CUDA, skipping",
+)
+class TestMinWithIndexMoreTypeAndShape(TestMinWithIndexBasic):
+    def set_op_input_attr(self):
+        self.dim = 1
+        self.keepdim = True
+
+    def set_data_type(self):
+        self.dtype = np.float32
+        self.is_int = False
+
+    def set_input_shape(self):
+        self.input_shape = [10, 20, 16]
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not compiled with CUDA, skipping",
+)
+class TestMinWithIndexFP16(TestMinWithIndexBasic):
+    def set_data_type(self):
+        self.dtype = np.float16
+        self.is_int = False
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not compiled with CUDA, skipping",
+)
+class TestMaxWithIndexU8(TestMaxWithIndexBasic):
+    def set_data_type(self):
+        self.dtype = np.uint8
+        self.is_int = True
+
+    @unittest.skipIf(
+        True,
+        "integral type does not need to check grad",
+    )
+    def test_check_grad(self):
+        pass
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not compiled with CUDA, skipping",
+)
+class TestMaxWithIndexMoreTypeAndShape(TestMaxWithIndexBasic):
+    def set_op_input_attr(self):
+        self.dim = -1
+        self.keepdim = False
+
+    def set_data_type(self):
+        self.dtype = np.uint8
+        self.is_int = True
+
+    def set_input_shape(self):
+        self.input_shape = [4095]
+
+    @unittest.skipIf(
+        True,
+        "integral type does not need to check grad",
+    )
+    def test_check_grad(self):
+        pass
+
+
+class TestMinMaxWithIndexPlace(unittest.TestCase):
+    def init(self):
+        self.input_shape = [30, 10, 10]
+        self.data = np.random.randn(30, 10, 10)
+
+    def setUp(self):
+        self.init()
+
+    def cpu_place(self):
+        self.place = core.CPUPlace()
+
+    def test_api_static_cpu_err_handling_1(self):
+        self.cpu_place()
+        with (
+            self.assertRaises(NotImplementedError),
+            paddle.static.program_guard(paddle.static.Program()),
+        ):
+            input = paddle.static.data(
+                name="input", shape=self.input_shape, dtype="float64"
+            )
+            output = max_with_index(input, dim=0)
+            exe = paddle.static.Executor(self.place)
+            result = exe.run(
+                paddle.static.default_main_program(),
+                feed={'input': self.data},
+                fetch_list=[output],
+            )
+
+    def test_api_static_cpu_err_handling_2(self):
+        self.cpu_place()
+        with (
+            self.assertRaises(NotImplementedError),
+            paddle.static.program_guard(paddle.static.Program()),
+        ):
+            input = paddle.static.data(
+                name="input", shape=self.input_shape, dtype="float32"
+            )
+            output = min_with_index(input, dim=-2, keepdim=True)
+            exe = paddle.static.Executor(self.place)
+            result = exe.run(
+                paddle.static.default_main_program(),
+                feed={'input': self.data.astype(np.float32)},
+                fetch_list=[output],
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()

From bfd5134a0b366d4eeffe80f5cbfec5703bfa8207 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Thu, 14 Aug 2025 06:55:57 +0000
Subject: [PATCH 17/24] [API-Compat] min/max static graph op test and out
 tensor support

---
 python/paddle/tensor/compat.py         | 107 ++++++++++++++++----
 test/legacy_test/test_compat_minmax.py | 131 +++++++++++++++++++++++--
 2 files changed, 210 insertions(+), 28 deletions(-)

diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py
index 7b5e035e551d40..bf5c52f12bc144 100644
--- a/python/paddle/tensor/compat.py
+++ b/python/paddle/tensor/compat.py
@@ -499,12 +499,44 @@ def _min_max_allow_cpu_composite(input: Tensor):
         )
 
 
+def _check_out_status(
+    out: Tensor | tuple[Tensor, Tensor] | list[Tensor],
+    expect_multiple: bool = False,
+):
+    if out is None:
+        return
+    if not in_dynamic_mode():
+        raise RuntimeError(
+            "Using `out` static graph CINN backend is currently not supported. Directly return the tensor tuple instead.\n"
+        )
+    if expect_multiple:
+        if not isinstance(out, (tuple, list)) or len(out) != 2:
+            raise TypeError(
+                f"Expected a list or tuple of two tensors, got {type(out)} instead."
+            )
+        if not (
+            isinstance(out[0], paddle.Tensor)
+            and isinstance(out[1], paddle.Tensor)
+        ):
+            raise TypeError(
+                f"Expected Tensor type in the tuple/list, got ({type(out[0])}, {type(out[1])}) instead."
+            )
+    else:
+        if not isinstance(out, paddle.Tensor):
+            raise TypeError(f"Expected a Tensor, got {type(out)} instead.")
+
+
 @ForbidKeywordsDecorator(
     illegal_keys={"x", "axis"},
     func_name="paddle.compat.min",
     correct_name="paddle.min",
 )
-def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
+def min(
+    input: Tensor,
+    *args: Any,
+    out: Tensor | tuple[Tensor, Tensor] | list[Tensor] = None,
+    **kwargs: Any,
+) -> Tensor | MinMaxRetType:
     """
 
     Computes the minimum of tensor elements. There are mainly 3 cases (functionalities):
@@ -537,6 +569,9 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
         other (Tensor, optional): the other tensor to perform `paddle.minimum` with. This Tensor should
             have the same or broadcast-able shape as the `input`. Note that (`dim` & `keepdim`) and `other` are mutually exclusive
             meaning that trying to composite both will result in TypeError
+        out (Tensor|tuple[Tensor, Tensor], optional): the output Tensor or tuple of (Tensor, int64 Tensor) that can be optionally
+            given to be used as output buffers. For case 1 and 3 out is just a Tensor, while for case 2 we expect a tuple
+
 
     Returns:
         - For case 1: a single value Tensor (0-dim)
@@ -594,29 +629,43 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
 
     dim_or_other, keepdim = _min_max_param_checker("min", *args, **kwargs)
 
+    ret = None
     if dim_or_other is None:
         # paddle.min and paddle.amin actually shares the same grad op (ReduceAminKernel)
-        return paddle.min(input)
+        _check_out_status(out, False)
+        ret = paddle.min(input)
     elif isinstance(dim_or_other, int):
+        _check_out_status(out, True)
         if in_dynamic_mode() and not input.place.is_gpu_place():
             _min_max_allow_cpu_composite(input)
             # CPUPlace and other placements are implemented by composition
             indices = paddle.argmin(input, axis=dim_or_other, keepdim=True)
             values = paddle.take_along_axis(input, indices, axis=dim_or_other)
             if keepdim:
-                return MinMaxRetType(values=values, indices=indices)
-            return MinMaxRetType(
-                values=values.squeeze_(axis=dim_or_other),
-                indices=indices.squeeze_(axis=dim_or_other),
-            )
+                ret = MinMaxRetType(values=values, indices=indices)
+            else:
+                ret = MinMaxRetType(
+                    values=values.squeeze_(axis=dim_or_other),
+                    indices=indices.squeeze_(axis=dim_or_other),
+                )
         else:
             vals, inds = _C_ops.min_with_index(
                 input, dim_or_other, keepdim, False
             )
             inds.stop_gradient = True
-            return MinMaxRetType(values=vals, indices=inds)
+            ret = MinMaxRetType(values=vals, indices=inds)
+    else:
+        _check_out_status(out, False)
+        ret = _C_ops.minimum(input, dim_or_other)
+
+    if out is None:
+        return ret
     else:
-        return _C_ops.minimum(input, dim_or_other)
+        if isinstance(ret, MinMaxRetType):
+            paddle.assign(ret.values, out[0])
+            paddle.assign(ret.indices, out[1])
+        else:
+            paddle.assign(ret, out)
 
 
 @ForbidKeywordsDecorator(
@@ -624,7 +673,12 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
     func_name="paddle.compat.max",
     correct_name="paddle.max",
 )
-def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
+def max(
+    input: Tensor,
+    *args: Any,
+    out: Tensor | tuple[Tensor, Tensor] | list[Tensor] = None,
+    **kwargs: Any,
+) -> Tensor | MinMaxRetType:
     """
 
     Computes the maximum of tensor elements. There are mainly 3 cases (functionalities):
@@ -657,6 +711,9 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
         other (Tensor, optional): the other tensor to perform `paddle.maximum` with. This Tensor should
             have the same or broadcast-able shape as the `input`. Note that (`dim` & `keepdim`) and `other` are mutually exclusive
             meaning that trying to composite both will result in TypeError
+        out (Tensor|tuple[Tensor, Tensor], optional): the output Tensor or tuple of (Tensor, int64 Tensor) that can be optionally
+            given to be used as output buffers. For case 1 and 3 out is just a Tensor, while for case 2 we expect a tuple
+
 
     Returns:
         - For case 1: a single value Tensor (0-dim)
@@ -714,24 +771,38 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType:
 
     dim_or_other, keepdim = _min_max_param_checker("max", *args, **kwargs)
 
+    ret = None
     if dim_or_other is None:
-        return paddle.max(input)
+        _check_out_status(out, False)
+        ret = paddle.max(input)
     elif isinstance(dim_or_other, int):
+        _check_out_status(out, True)
         if in_dynamic_mode() and not input.place.is_gpu_place():
             _min_max_allow_cpu_composite(input)
             indices = paddle.argmax(input, axis=dim_or_other, keepdim=True)
             values = paddle.take_along_axis(input, indices, axis=dim_or_other)
             if keepdim:
-                return MinMaxRetType(values=values, indices=indices)
-            return MinMaxRetType(
-                values=values.squeeze_(axis=dim_or_other),
-                indices=indices.squeeze_(axis=dim_or_other),
-            )
+                ret = MinMaxRetType(values=values, indices=indices)
+            else:
+                ret = MinMaxRetType(
+                    values=values.squeeze_(axis=dim_or_other),
+                    indices=indices.squeeze_(axis=dim_or_other),
+                )
         else:
             vals, inds = _C_ops.max_with_index(
                 input, dim_or_other, keepdim, False
             )
             inds.stop_gradient = True
-            return MinMaxRetType(values=vals, indices=inds)
+            ret = MinMaxRetType(values=vals, indices=inds)
     else:
-        return _C_ops.maximum(input, dim_or_other)
+        _check_out_status(out, False)
+        ret = _C_ops.maximum(input, dim_or_other)
+
+    if out is None:
+        return ret
+    else:
+        if isinstance(ret, MinMaxRetType):
+            paddle.assign(ret.values, out[0])
+            paddle.assign(ret.indices, out[1])
+        else:
+            paddle.assign(ret, out)
diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py
index 46d4bb8f257a0a..1fb22cbff256c6 100644
--- a/test/legacy_test/test_compat_minmax.py
+++ b/test/legacy_test/test_compat_minmax.py
@@ -40,6 +40,7 @@ def __init__(
         self.index_op = index_op
         self.test_op_name = test_op_name
         self.origin_op_name = origin_op_name
+        np.random.seed(1)
 
     def test_case1_simple_reduce_all(self):
         data = paddle.to_tensor([[1.0, 2.0], [3.0, 4.0]], dtype='float32')
@@ -237,6 +238,47 @@ def test_compare_with_index_ops_to_origin(self):
                 vals_inds[1].numpy(), origin_indices.numpy()
             )
 
+    def test_case1_out(self):
+        data = np.random.randn(4, 5, 6).astype(np.float32)
+        x = paddle.to_tensor(data, stop_gradient=False)
+        y = paddle.to_tensor(data, stop_gradient=False)
+        out = paddle.to_tensor(0)
+        self.test_op(x, out=out)
+        gt_out = self.origin_op(y)
+        gt_out.backward()
+        out.backward()
+
+        np.testing.assert_allclose(out.numpy(), gt_out.numpy())
+        np.testing.assert_allclose(x.grad.numpy(), y.grad.numpy())
+
+    def test_case2_out(self):
+        for type_to_use in (list, tuple):
+            data = np.random.randn(3, 17, 5).astype(np.float32)
+            x = paddle.to_tensor(data, stop_gradient=False)
+            y = paddle.to_tensor(data, stop_gradient=False)
+            out = type_to_use((paddle.to_tensor(0), paddle.to_tensor(0)))
+            self.test_op(x, dim=1, out=out)
+            gt_vals = self.origin_op(y, axis=1)
+            gt_inds = self.index_op(y, axis=1)
+            gt_vals.backward()
+            out[0].backward()
+
+            np.testing.assert_allclose(out[0].numpy(), gt_vals.numpy())
+            np.testing.assert_array_equal(out[1].numpy(), gt_inds.numpy())
+            np.testing.assert_allclose(x.grad.numpy(), y.grad.numpy())
+
+    def test_case3_out(self):
+        data = np.random.randn(3, 4, 5).astype(np.float32)
+        x = paddle.to_tensor(data)
+        y = paddle.to_tensor(data)
+        out = paddle.to_tensor(0)
+        self.test_op(x, paddle.ones_like(x), out=out)
+        if self.test_op_name.endswith("min"):
+            gt_vals = paddle.minimum(x, paddle.ones_like(x))
+        else:
+            gt_vals = paddle.maximum(x, paddle.ones_like(x))
+        np.testing.assert_allclose(out.numpy(), gt_vals.numpy())
+
     def test_error_handling(self):
         """Test whether correct exception will be thrown. Skip error messages (some of them are long)"""
 
@@ -355,7 +397,54 @@ def test_error_handling(self):
         with self.assertRaises(TypeError):
             self.test_op(input_ts, "first_dim")
 
-    def _compare_with_origin_static(self, input_shape, axis=0, keepdim=False):
+        paddle.enable_static()
+        with (
+            self.assertRaises(RuntimeError) as cm,
+            paddle.static.program_guard(paddle.static.Program()),
+        ):
+            x = paddle.static.data(name='x', shape=[None, 6], dtype='float32')
+            result0, result1 = self.test_op(
+                paddle.zeros([3, 4]),
+                dim=1,
+                out=(
+                    paddle.zeros([3, 4]),
+                    paddle.zeros([3, 4], dtype=paddle.int64),
+                ),
+            )
+
+            place = (
+                paddle.CUDAPlace(0)
+                if paddle.is_compiled_with_cuda()
+                else paddle.CPUPlace()
+            )
+            paddle.static.Executor(place).run()
+            self.assertEqual(
+                str(cm.exception),
+                "Using `out` static graph CINN backend is currently not supported. Directly return the tensor tuple instead.\n",
+            )
+        paddle.disable_static()
+
+        def test_wrong_out_input(dim, out_input):
+            with self.assertRaises(TypeError) as cm:
+                if dim is None:
+                    self.test_op(input_ts, out=out_input)
+                else:
+                    self.test_op(input_ts, dim=dim, out=out_input)
+
+        test_wrong_out_input(0, [0, paddle.to_tensor(0)])
+        test_wrong_out_input(0, paddle.to_tensor(0))
+        test_wrong_out_input(None, 0)
+        test_wrong_out_input(None, (paddle.to_tensor(0),))
+
+    def _compare_with_origin_static(
+        self, input_shape, axis_or_other=0, keepdim=False, use_out=False
+    ):
+        """Test Case 2 and Case 3 for return output or param output in static graph mode
+
+        TODO(heqianyue): DO NOT set use_out for now!
+        Currently, static graph + CINN backend will result in unresolved dependency bug for assign op
+        This test is disabled for now, but will be useful when dy2st bug is fixed.
+        """
         numel = 1
         for v in input_shape:
             numel *= v
@@ -365,12 +454,34 @@ def _compare_with_origin_static(self, input_shape, axis=0, keepdim=False):
                 input_shape
             )
 
-            y = input_tensor**2 + input_tensor
-            values, indices = self.test_op(y, dim=axis, keepdim=keepdim)
-            values += 1
-
-            gt_values = self.origin_op(y, axis=axis, keepdim=keepdim) + 1
-            gt_indices = self.index_op(y, axis=axis, keepdim=keepdim)
+            y = input_tensor**2
+            if isinstance(axis_or_other, int):
+                if use_out:
+                    out = [paddle.to_tensor(0), paddle.to_tensor([0])]
+                    self.test_op(y, dim=axis_or_other, keepdim=keepdim, out=out)
+                    values, indices = out
+                else:
+                    values, indices = self.test_op(
+                        y, dim=axis_or_other, keepdim=keepdim
+                    )
+                gt_values = self.origin_op(
+                    y, axis=axis_or_other, keepdim=keepdim
+                )
+                gt_indices = self.index_op(
+                    y, axis=axis_or_other, keepdim=keepdim
+                )
+            else:
+                if use_out:
+                    out = paddle.to_tensor(0)
+                    self.test_op(y, axis_or_other, out=out)
+                    values, indices = out, paddle.to_tensor(0)
+                else:
+                    values, indices = self.test_op(y, axis_or_other)
+                if self.test_op_name.endswith("min"):
+                    gt_values = paddle.minimum(y, axis=axis_or_other, out=None)
+                else:
+                    gt_values = paddle.maximum(y, axis=axis_or_other)
+                gt_indices = paddle.to_tensor(0)
 
             place = paddle.CUDAPlace(0)
             exe = paddle.static.Executor(place)
@@ -386,9 +497,9 @@ def _compare_with_origin_static(self, input_shape, axis=0, keepdim=False):
         "core is not compiled with CUDA, skipping",
     )
     def test_static_graph(self):
-        self._compare_with_origin_static([3, 10, 2], axis=1)
-        self._compare_with_origin_static([3, 10, 2], axis=0, keepdim=True)
-        self._compare_with_origin_static([17], axis=0)
+        self._compare_with_origin_static([3, 10, 2], 1)
+        self._compare_with_origin_static([3, 10, 2], 0, keepdim=True)
+        self._compare_with_origin_static([17], 0)
 
 
 class TestCompatMax(TestCompatMinMaxBase):

From fb8bba0b37597dbe1bde9d5c6b77516482a98826 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Thu, 14 Aug 2025 07:07:42 +0000
Subject: [PATCH 18/24] [API-Compat] Resolved merge conflicts.

---
 python/paddle/utils/decorator_utils.py | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/python/paddle/utils/decorator_utils.py b/python/paddle/utils/decorator_utils.py
index 8c3381463f28a7..55adce7e8961c4 100644
--- a/python/paddle/utils/decorator_utils.py
+++ b/python/paddle/utils/decorator_utils.py
@@ -362,29 +362,3 @@ def wrapper(*args: _InputT.args, **kwargs: _InputT.kwargs) -> _RetT:
         return wrapper
 
     return decorator
-class ForbidKeywordsDecorator(DecoratorBase):
-    """A decorator that hints users to use the correct `compat` functions, when erroneous keyword arguments are detected"""
-
-    def __init__(
-        self, illegal_keys: set[str], func_name: str, correct_name: str
-    ) -> None:
-        super().__init__()
-        self.illegal_keys = illegal_keys
-        self.func_name = func_name
-        self.correct_name = correct_name
-
-    def process(
-        self, args: tuple[Any, ...], kwargs: dict[str, Any]
-    ) -> tuple[tuple[Any, ...], dict[str, Any]]:
-        found_keys = [key for key in self.illegal_keys if key in kwargs]
-
-        if found_keys:
-            found_keys.sort()
-            keys_str = ", ".join(f"'{key}'" for key in found_keys)
-            plural = "s" if len(found_keys) > 1 else ""
-
-            raise TypeError(
-                f"{self.func_name}() received unexpected keyword argument{plural} {keys_str}. "
-                f"\nDid you mean to use {self.correct_name}() instead?"
-            )
-        return args, kwargs

From 47a08dceaa5e0cde498d12d621460e2a1756c498 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Thu, 14 Aug 2025 14:54:23 +0000
Subject: [PATCH 19/24] [API-Compat] Fixed CPU static graph bugs

removed split API for independence.
---
 .../kernels/cpu/min_max_with_index_kernel.cc  |   4 +-
 paddle/phi/kernels/gpu/reduce_kernel.cu       |   1 -
 python/paddle/tensor/compat.py                |   4 +-
 python/paddle/tensor/manipulation.py          |   7 -
 test/legacy_test/test_compat_minmax.py        |  11 --
 test/legacy_test/test_compat_split.py         | 177 -----------------
 test/legacy_test/test_compat_split_static.py  | 184 ------------------
 test/legacy_test/test_minmax_with_index_op.py |   4 +-
 8 files changed, 7 insertions(+), 385 deletions(-)
 delete mode 100644 test/legacy_test/test_compat_split.py
 delete mode 100644 test/legacy_test/test_compat_split_static.py

diff --git a/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc b/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc
index f373553389e422..0cf9de846ba043 100644
--- a/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc
+++ b/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc
@@ -32,7 +32,7 @@
                                DenseTensor* ind_out) {                  \
     PADDLE_ENFORCE_EQ(0,                                                \
                       1,                                                \
-                      phi::errors::Unimplemented(                       \
+                      phi::errors::PreconditionNotMet(                  \
                           "In static graph mode, %s PHI kernel is not " \
                           "currently available on non-GPU devices.",    \
                           #name));                                      \
@@ -48,7 +48,7 @@
                                    DenseTensor* x_grad) {               \
     PADDLE_ENFORCE_EQ(0,                                                \
                       1,                                                \
-                      phi::errors::Unimplemented(                       \
+                      phi::errors::PreconditionNotMet(                  \
                           "In static graph mode, %s PHI kernel is not " \
                           "currently available on non-GPU devices.",    \
                           #name));                                      \
diff --git a/paddle/phi/kernels/gpu/reduce_kernel.cu b/paddle/phi/kernels/gpu/reduce_kernel.cu
index 3f55297474015c..95132d09e2cc22 100644
--- a/paddle/phi/kernels/gpu/reduce_kernel.cu
+++ b/paddle/phi/kernels/gpu/reduce_kernel.cu
@@ -13,7 +13,6 @@
 // limitations under the License.
 
 #include "paddle/phi/kernels/reduce_kernel.h"
-#include <type_traits>
 
 #include "paddle/phi/kernels/gpu/reduce_amin_amax_common.h"
 #include "paddle/phi/kernels/reduce_amin_grad_kernel.h"
diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py
index bf5c52f12bc144..698f082759ef01 100644
--- a/python/paddle/tensor/compat.py
+++ b/python/paddle/tensor/compat.py
@@ -25,7 +25,6 @@
 )
 
 if TYPE_CHECKING:
-    from collections.abc import Sequence
 
     from paddle import Tensor
     from paddle._typing import (
@@ -223,6 +222,7 @@ class SortRetType(NamedTuple):
     values: Tensor
     indices: Tensor
 
+
 class MinMaxRetType(NamedTuple):
     values: Tensor
     indices: Tensor
@@ -402,6 +402,8 @@ def to_list_if_necessary(x, size_check=False):
             dilations=to_list_if_necessary(self.dilations),
             name=self.name,
         )
+
+
 def _min_max_param_checker(func_name: str, *args: Any, **kwargs: Any):
     def invalid_arguments_exception(error_prefix=""):
         type_strs = [type(v).__name__ for v in args]
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index 403f48d17c2334..4a65c5695ba6ff 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -64,8 +64,6 @@
         TensorOrTensors,
     )
 
-from paddle.utils.decorator_utils import ForbidKeywordsDecorator
-
 __all__ = []
 
 
@@ -2738,11 +2736,6 @@ def row_stack(x: Sequence[Tensor], name: str | None = None) -> Tensor:
     return paddle.vstack(x, name=name)
 
 
-@ForbidKeywordsDecorator(
-    illegal_keys={"tensor", "split_size_or_sections", "dim"},
-    func_name="paddle.split",
-    correct_name="paddle.compat.split",
-)
 def split(
     x: Tensor,
     num_or_sections: int | Sequence[int],
diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py
index 1fb22cbff256c6..d043b82fcea426 100644
--- a/test/legacy_test/test_compat_minmax.py
+++ b/test/legacy_test/test_compat_minmax.py
@@ -45,21 +45,10 @@ def __init__(
     def test_case1_simple_reduce_all(self):
         data = paddle.to_tensor([[1.0, 2.0], [3.0, 4.0]], dtype='float32')
         val = self.test_op(data)
-
         if self.test_op_name.endswith("min"):
             self.assertAlmostEqual(val.item(), 1.0)
-            expected_grad = np.array([[0.5, 0.5], [0.0, 0.0]])
         else:
             self.assertAlmostEqual(val.item(), 4.0)
-            expected_grad = np.array([[0.0, 0.0], [0.0, 1.0]])
-
-        data = paddle.to_tensor(
-            [[1.0, 1.0], [2.0, 3.0]], dtype='float32', stop_gradient=False
-        )
-        val = self.test_op(data)
-        val.backward()
-
-        np.testing.assert_allclose(data.grad.numpy(), expected_grad)
 
     def test_case2_reduce_dim(self):
         """Test dim/keepdim"""
diff --git a/test/legacy_test/test_compat_split.py b/test/legacy_test/test_compat_split.py
deleted file mode 100644
index a582f1b0948c4b..00000000000000
--- a/test/legacy_test/test_compat_split.py
+++ /dev/null
@@ -1,177 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-
-import paddle
-from paddle.compat import split
-
-
-class TestCompatSplit(unittest.TestCase):
-    def _compare_with_origin(self, input_tensor, size, axis=0):
-        pd_results = split(input_tensor, size, dim=axis)
-
-        if isinstance(size, int):
-            shape_on_axis = input_tensor.shape[axis]
-            remaining_num = shape_on_axis % size
-            num_sections = shape_on_axis // size
-            if remaining_num == 0:
-                size = num_sections
-            else:
-                size = [size for _ in range(num_sections)]
-                size.append(remaining_num)
-
-        origin_results = paddle.split(
-            input_tensor, num_or_sections=size, axis=axis
-        )
-
-        self.assertEqual(len(origin_results), len(pd_results))
-
-        # check shape and output section size of the output
-        for origin_ts, pd_ts in zip(origin_results, pd_results):
-            np.testing.assert_allclose(origin_ts.numpy(), pd_ts.numpy())
-
-    def test_basic_split(self):
-        """Test basic splitting with integer size"""
-        data = paddle.arange(12).reshape([3, 4]).astype('float32')
-        self._compare_with_origin(data, 1, 0)
-        self._compare_with_origin(data, 2, 1)
-
-    def test_split_with_list_sections(self):
-        """Test splitting with list of section sizes"""
-        data = paddle.rand([10, 5])
-        self._compare_with_origin(data, [3, 2, 5], 0)
-        self._compare_with_origin(data, [1, 4], -1)
-
-    def test_chained_operations(self):
-        """Test split with complex operation chain"""
-        x = paddle.rand([8, 12])
-        y = paddle.sin(x) * 2.0 + paddle.exp(x) / 3.0
-        z = paddle.nn.functional.relu(y)
-
-        z1, z2 = split(z, 7, dim=1)
-
-        self.assertEqual(z1.shape, [8, 7])
-        self.assertEqual(z2.shape, [8, 5])
-
-        z_np = z.numpy()
-        np.testing.assert_allclose(z_np[:, :7], z1.numpy())
-        np.testing.assert_allclose(z_np[:, 7:], z2.numpy())
-
-    def test_split_grad(self):
-        """Test backprop for split, in1 and in2 are computed by
-        compat.split and original split"""
-
-        def get_tensors():
-            np.random.seed(114514)
-            np_arr = np.random.normal(0, 1, [2, 3, 4, 5])
-            return paddle.to_tensor(np_arr), paddle.to_tensor(np_arr)
-
-        in1, in2 = get_tensors()
-        in1.stop_gradient = False
-        in2.stop_gradient = False
-
-        def computation_graph(in_tensor):
-            y = in_tensor * 2.3 + 3.0
-            y = paddle.maximum(y, paddle.to_tensor([0], dtype=paddle.float32))
-            return y.mean(axis=0)
-
-        out1 = computation_graph(in1)
-        out2 = computation_graph(in2)
-
-        packs1 = paddle.compat.split(out1, 2, dim=2)
-        packs2 = paddle.split(out2, [2, 2, 1], axis=2)
-
-        res1 = packs1[0] + packs1[1] + packs1[2]
-        res2 = packs2[0] + packs2[1] + packs2[2]
-        res1.backward()
-        res2.backward()
-        np.testing.assert_allclose(in1.grad.numpy(), in2.grad.numpy())
-
-    def test_empty_dim(self):
-        """Split with empty dim"""
-        in_tensor = paddle.arange(72, dtype=paddle.int64).reshape([3, 12, 2])
-        self._compare_with_origin(in_tensor, [5, 0, 7], axis=1)
-
-    def test_split_with_one_block(self):
-        """Resulting tuple should be of length 1"""
-        in_tensor = paddle.arange(60, dtype=paddle.float32).reshape([3, 4, 5])
-        self._compare_with_origin(in_tensor, 5, paddle.to_tensor([-1]))
-        self._compare_with_origin(in_tensor, [5], paddle.to_tensor(2))
-
-    def test_edge_cases(self):
-        """Test edge cases and error handling"""
-        x = paddle.arange(5)
-        s1, s2 = split(x, [3, 2])
-        np.testing.assert_allclose(s1.numpy(), [0, 1, 2])
-        np.testing.assert_allclose(s2.numpy(), [3, 4])
-
-        x = paddle.rand([2, 2, 2])
-        a, b = split(x, 1, 2)
-        self.assertEqual(a.shape, [2, 2, 1])
-
-        # invalid split sections
-        with self.assertRaises(ValueError):
-            split(x, [3, 1], 1)
-
-        # invalid split axis
-        with self.assertRaises(ValueError):
-            split(x, 2, 3)
-
-    def test_error_hint(self):
-        """Test whether there will be correct exception when users pass paddle.split kwargs in paddle.compat.split, vice versa."""
-        x = paddle.randn([3, 9, 5])
-
-        msg_gt_1 = (
-            "paddle.split() received unexpected keyword arguments 'dim', 'split_size_or_sections', 'tensor'. "
-            "\nDid you mean to use paddle.compat.split() instead?"
-        )
-        msg_gt_2 = (
-            "paddle.compat.split() received unexpected keyword argument 'num_or_sections'. "
-            "\nDid you mean to use paddle.split() instead?"
-        )
-        msg_gt_3 = "(InvalidArgument) The dim is expected to be in range of [-3, 3), but got 3"
-        msg_gt_4 = "paddle.compat.split expects split_sizes have only non-negative entries, but got size = -5 on dim 2"
-
-        split_size = paddle.to_tensor([3])
-        msg_gt_5 = (
-            "The type of 'split_size_or_sections' in split must be int, list or tuple in imperative mode, but "
-            f"received {type(split_size)}."
-        )
-
-        with self.assertRaises(TypeError) as cm:
-            tensors = paddle.split(tensor=x, split_size_or_sections=3, dim=0)
-        self.assertEqual(str(cm.exception), msg_gt_1)
-
-        with self.assertRaises(TypeError) as cm:
-            tensors = split(x, num_or_sections=3, dim=0)
-        self.assertEqual(str(cm.exception), msg_gt_2)
-
-        with self.assertRaises(ValueError) as cm:
-            tensors = split(x, 3, dim=3)
-        self.assertEqual(str(cm.exception), msg_gt_3)
-
-        with self.assertRaises(ValueError) as cm:
-            tensors = split(x, [3, 3, -5], -2)
-        self.assertEqual(str(cm.exception), msg_gt_4)
-
-        with self.assertRaises(TypeError) as cm:
-            tensors = split(x, split_size, 1)
-        self.assertEqual(str(cm.exception), msg_gt_5)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/legacy_test/test_compat_split_static.py b/test/legacy_test/test_compat_split_static.py
deleted file mode 100644
index 006e3ec30ea077..00000000000000
--- a/test/legacy_test/test_compat_split_static.py
+++ /dev/null
@@ -1,184 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-
-import paddle
-from paddle.compat import split
-
-
-class TestCompatSplitStatic(unittest.TestCase):
-    def _compare_with_origin_static(
-        self, input_shape, size, axis=0, dim_rank=-1
-    ):
-        """size_dim: -1 means we input size by int, 0 means 0-size tensor, 1 means tensor with shape [1]"""
-        numel = 1
-        for v in input_shape:
-            numel *= v
-        input_axis = axis
-        if dim_rank == 0:
-            input_axis = paddle.to_tensor(axis)
-        elif dim_rank == 1:
-            input_axis = paddle.to_tensor([axis])
-        paddle.enable_static()
-        with paddle.static.program_guard(paddle.static.Program()):
-            input_tensor = paddle.arange(numel, dtype=paddle.float32).reshape(
-                input_shape
-            )
-            pd_results = split(input_tensor, size, dim=input_axis)
-
-            if isinstance(size, int):
-                shape_on_axis = input_tensor.shape[axis]
-                remaining_num = shape_on_axis % size
-                num_sections = shape_on_axis // size
-                if remaining_num == 0:
-                    size = num_sections
-                else:
-                    size = [size for _ in range(num_sections)]
-                    size.append(remaining_num)
-
-            origin_results = paddle.split(
-                input_tensor, num_or_sections=size, axis=axis
-            )
-            assert len(pd_results) == len(origin_results), "length mismatched"
-            place = (
-                paddle.CUDAPlace(0)
-                if paddle.is_compiled_with_cuda()
-                else paddle.CPUPlace()
-            )
-            exe = paddle.static.Executor(place)
-            results = exe.run(fetch_list=[*origin_results, *pd_results])
-            length_needed = len(results) // 2
-            for i in range(length_needed):
-                np.testing.assert_allclose(
-                    results[i], results[i + length_needed]
-                )
-        paddle.disable_static()
-
-    def test_split_composite_static(self):
-        paddle.seed(114514)
-
-        def get_tensors():
-            np.random.seed(114514)
-            np_arr = np.random.normal(0, 1, [2, 3, 4, 5])
-            return paddle.to_tensor(np_arr), paddle.to_tensor(np_arr)
-
-        in1, in2 = get_tensors()
-        in1.stop_gradient = False
-        in2.stop_gradient = False
-
-        @paddle.jit.to_static
-        def computation_graph(in1: paddle.Tensor, in2: paddle.Tensor):
-            y1 = in1 * 1.5 + 1.0
-            y1 = paddle.minimum(y1, paddle.to_tensor([0], dtype=paddle.float32))
-            out1 = y1.mean(axis=0)
-
-            y2 = in2 * 1.5 + 1.0
-            y2 = paddle.minimum(y2, paddle.to_tensor([0], dtype=paddle.float32))
-            out2 = y2.mean(axis=0)
-
-            packs1 = paddle.compat.split(out1, 2, dim=2)
-            packs2 = paddle.split(out2, [2, 2, 1], axis=2)
-
-            res1 = packs1[0] + packs1[1] + packs1[2]
-            res2 = packs2[0] + packs2[1] + packs2[2]
-
-            return res1, res2
-
-        res1, res2 = computation_graph(in1, in2)
-        np.testing.assert_allclose(res1.numpy(), res2.numpy())
-
-    def test_static_graph(self):
-        """Test static graph execution"""
-        # fixed random seed for reproducibility
-        np.random.seed(114514)
-        # old static graph mode
-        paddle.enable_static()
-
-        with paddle.static.program_guard(paddle.static.Program()):
-            x = paddle.static.data(name='x', shape=[None, 6], dtype='float32')
-            result0, result1 = split(x, split_size_or_sections=[3, 3], dim=1)
-            output = result0 * 2.0 + paddle.sin(result1)
-
-            place = (
-                paddle.CUDAPlace(0)
-                if paddle.is_compiled_with_cuda()
-                else paddle.CPUPlace()
-            )
-            exe = paddle.static.Executor(place)
-
-            input_data = np.random.rand(3, 6).astype('float32')
-            feed = {'x': input_data}
-
-            results = exe.run(feed=feed, fetch_list=[result0, result1, output])
-
-            pd_result0, pd_result1 = results[0], results[1]
-            np.testing.assert_allclose(input_data[:, :3], pd_result0)
-            np.testing.assert_allclose(input_data[:, 3:], pd_result1)
-
-            expected_output = input_data[:, :3] * 2.0 + np.sin(
-                input_data[:, 3:]
-            )
-            np.testing.assert_allclose(
-                expected_output, results[2], rtol=1e-4, atol=1e-4
-            )
-
-        paddle.disable_static()
-
-    def test_error_hint(self):
-        """Test whether there will be correct exception when users pass paddle.split kwargs in paddle.compat.split, vice versa."""
-
-        msg_gt_1 = "split_size_or_sections must be greater than 0."
-        msg_gt_2 = "len(split_size_or_sections) must not be more than input.shape[dim]."
-        msg_gt_3 = "The type of 'split_size_or_sections' in split must be int, list or tuple in imperative mode."
-        msg_gt_4 = (
-            "'dim' is not allowed to be a pir.Value in a static graph: "
-            "\npir.Value can not be used for indexing python lists/tuples."
-        )
-
-        paddle.enable_static()
-        with self.assertRaises(AssertionError) as cm:
-            x = paddle.randn([3, 4, 5])
-            tensors = split(x, -2, dim=0)
-        self.assertEqual(str(cm.exception), msg_gt_1)
-
-        with self.assertRaises(AssertionError) as cm:
-            x = paddle.randn([3, 4, 5])
-            tensors = split(x, (1, 1, 1, 1, 2, 2), dim=-1)
-        self.assertEqual(str(cm.exception), msg_gt_2)
-
-        with self.assertRaises(TypeError) as cm:
-            x = paddle.randn([3, 4, 5])
-            tensors = split(x, paddle.to_tensor(2), dim=2)
-        self.assertEqual(str(cm.exception), msg_gt_3)
-
-        with self.assertRaises(TypeError) as cm:
-            x = paddle.randn([3, 4, 5])
-            tensors = split(x, 2, dim=paddle.to_tensor(2))
-        paddle.disable_static()
-        self.assertEqual(str(cm.exception), msg_gt_4)
-
-    def test_basic_split(self):
-        """Test basic splitting with integer size"""
-        input_shape = [3, 6]
-        self._compare_with_origin_static(input_shape, 1, 0)
-        self._compare_with_origin_static(input_shape, 3, -1)
-        self._compare_with_origin_static(input_shape, 4, dim_rank=0)
-        self._compare_with_origin_static(input_shape, 3, dim_rank=1)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/legacy_test/test_minmax_with_index_op.py b/test/legacy_test/test_minmax_with_index_op.py
index cf1ff6f6bd5dc9..c95238739c4be3 100644
--- a/test/legacy_test/test_minmax_with_index_op.py
+++ b/test/legacy_test/test_minmax_with_index_op.py
@@ -245,7 +245,7 @@ def cpu_place(self):
     def test_api_static_cpu_err_handling_1(self):
         self.cpu_place()
         with (
-            self.assertRaises(NotImplementedError),
+            self.assertRaises(RuntimeError),
             paddle.static.program_guard(paddle.static.Program()),
         ):
             input = paddle.static.data(
@@ -262,7 +262,7 @@ def test_api_static_cpu_err_handling_1(self):
     def test_api_static_cpu_err_handling_2(self):
         self.cpu_place()
         with (
-            self.assertRaises(NotImplementedError),
+            self.assertRaises(RuntimeError),
             paddle.static.program_guard(paddle.static.Program()),
         ):
             input = paddle.static.data(

From 9300d1703acc6dccccd9353f9007aba7e01f1963 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Tue, 19 Aug 2025 08:18:54 +0000
Subject: [PATCH 20/24] [API-Compat] Resolved merged conflicts, add symbolic
 shape test.

---
 python/paddle/tensor/compat.py                |  76 +++++---
 python/paddle/tensor/manipulation.py          |   7 +
 python/paddle/utils/decorator_utils.py        |  41 ++--
 .../symbolic/test_infer_sym_shape_unary_op.py |  39 ++++
 test/legacy_test/test_compat_split.py         | 177 +++++++++++++++++
 test/legacy_test/test_compat_split_static.py  | 184 ++++++++++++++++++
 test/legacy_test/test_minmax_with_index_op.py |   5 +
 .../test_zero_dim_sundry_dygraph_api.py       |  55 ++++++
 8 files changed, 542 insertions(+), 42 deletions(-)
 create mode 100644 test/legacy_test/test_compat_split.py
 create mode 100644 test/legacy_test/test_compat_split_static.py

diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py
index 698f082759ef01..6ea2fcb5c80015 100644
--- a/python/paddle/tensor/compat.py
+++ b/python/paddle/tensor/compat.py
@@ -25,6 +25,7 @@
 )
 
 if TYPE_CHECKING:
+    from collections.abc import Sequence
 
     from paddle import Tensor
     from paddle._typing import (
@@ -638,24 +639,35 @@ def min(
         ret = paddle.min(input)
     elif isinstance(dim_or_other, int):
         _check_out_status(out, True)
-        if in_dynamic_mode() and not input.place.is_gpu_place():
-            _min_max_allow_cpu_composite(input)
-            # CPUPlace and other placements are implemented by composition
-            indices = paddle.argmin(input, axis=dim_or_other, keepdim=True)
-            values = paddle.take_along_axis(input, indices, axis=dim_or_other)
-            if keepdim:
-                ret = MinMaxRetType(values=values, indices=indices)
+        if input.ndim:
+            if in_dynamic_mode() and not input.place.is_gpu_place():
+                _min_max_allow_cpu_composite(input)
+                # CPUPlace and other placements are implemented by composition
+
+                indices = paddle.argmin(input, axis=dim_or_other, keepdim=True)
+                values = paddle.take_along_axis(
+                    input, indices, axis=dim_or_other
+                )
+                if keepdim:
+                    ret = MinMaxRetType(values=values, indices=indices)
+                else:
+                    ret = MinMaxRetType(
+                        values=values.squeeze_(axis=dim_or_other),
+                        indices=indices.squeeze_(axis=dim_or_other),
+                    )
             else:
-                ret = MinMaxRetType(
-                    values=values.squeeze_(axis=dim_or_other),
-                    indices=indices.squeeze_(axis=dim_or_other),
+                vals, inds = _C_ops.min_with_index(
+                    input, dim_or_other, keepdim, False
                 )
+                inds.stop_gradient = True
+                ret = MinMaxRetType(values=vals, indices=inds)
         else:
-            vals, inds = _C_ops.min_with_index(
-                input, dim_or_other, keepdim, False
+            ret = MinMaxRetType(
+                values=input,
+                indices=paddle.zeros(
+                    [], dtype=paddle.int64, device=input.place
+                ),
             )
-            inds.stop_gradient = True
-            ret = MinMaxRetType(values=vals, indices=inds)
     else:
         _check_out_status(out, False)
         ret = _C_ops.minimum(input, dim_or_other)
@@ -779,23 +791,33 @@ def max(
         ret = paddle.max(input)
     elif isinstance(dim_or_other, int):
         _check_out_status(out, True)
-        if in_dynamic_mode() and not input.place.is_gpu_place():
-            _min_max_allow_cpu_composite(input)
-            indices = paddle.argmax(input, axis=dim_or_other, keepdim=True)
-            values = paddle.take_along_axis(input, indices, axis=dim_or_other)
-            if keepdim:
-                ret = MinMaxRetType(values=values, indices=indices)
+        if input.ndim:
+            if in_dynamic_mode() and not input.place.is_gpu_place():
+                _min_max_allow_cpu_composite(input)
+                indices = paddle.argmax(input, axis=dim_or_other, keepdim=True)
+                values = paddle.take_along_axis(
+                    input, indices, axis=dim_or_other
+                )
+                if keepdim:
+                    ret = MinMaxRetType(values=values, indices=indices)
+                else:
+                    ret = MinMaxRetType(
+                        values=values.squeeze_(axis=dim_or_other),
+                        indices=indices.squeeze_(axis=dim_or_other),
+                    )
             else:
-                ret = MinMaxRetType(
-                    values=values.squeeze_(axis=dim_or_other),
-                    indices=indices.squeeze_(axis=dim_or_other),
+                vals, inds = _C_ops.max_with_index(
+                    input, dim_or_other, keepdim, False
                 )
+                inds.stop_gradient = True
+                ret = MinMaxRetType(values=vals, indices=inds)
         else:
-            vals, inds = _C_ops.max_with_index(
-                input, dim_or_other, keepdim, False
+            ret = MinMaxRetType(
+                values=input,
+                indices=paddle.zeros(
+                    [], dtype=paddle.int64, device=input.place
+                ),
             )
-            inds.stop_gradient = True
-            ret = MinMaxRetType(values=vals, indices=inds)
     else:
         _check_out_status(out, False)
         ret = _C_ops.maximum(input, dim_or_other)
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index 4a65c5695ba6ff..403f48d17c2334 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -64,6 +64,8 @@
         TensorOrTensors,
     )
 
+from paddle.utils.decorator_utils import ForbidKeywordsDecorator
+
 __all__ = []
 
 
@@ -2736,6 +2738,11 @@ def row_stack(x: Sequence[Tensor], name: str | None = None) -> Tensor:
     return paddle.vstack(x, name=name)
 
 
+@ForbidKeywordsDecorator(
+    illegal_keys={"tensor", "split_size_or_sections", "dim"},
+    func_name="paddle.split",
+    correct_name="paddle.compat.split",
+)
 def split(
     x: Tensor,
     num_or_sections: int | Sequence[int],
diff --git a/python/paddle/utils/decorator_utils.py b/python/paddle/utils/decorator_utils.py
index 55adce7e8961c4..8f0c55e38caf5c 100644
--- a/python/paddle/utils/decorator_utils.py
+++ b/python/paddle/utils/decorator_utils.py
@@ -127,21 +127,6 @@ def __init__(
         self.default_params = default_params
         warnings.simplefilter("always", category=Warning)
 
-
-# *size => shape decorator
-class SizeArgsDecorator(DecoratorBase):
-    """
-    Usage Example:
-
-    paddle.ones(1, dtype=paddle.float32)
-    paddle.ones(1, 2, 3, dtype=paddle.float32)
-    paddle.ones([1, 2, 3], dtype=paddle.float32)
-    paddle.ones(size=[1, 2, 3], dtype=paddle.float32)
-
-    paddle.ones([1, 2, 3], paddle.float32)
-    paddle.ones(shape=[1, 2, 3], dtype=paddle.float32)
-    """
-
     def process(
         self, args: tuple[Any, ...], kwargs: dict[str, Any]
     ) -> tuple[tuple[Any, ...], dict[str, Any]]:
@@ -262,6 +247,32 @@ def wrapper(*args: _InputT.args, **kwargs: _InputT.kwargs) -> _RetT:
     return decorator
 
 
+# *size => shape decorator
+class SizeArgsDecorator(DecoratorBase):
+    """
+    Usage Example:
+
+    paddle.ones(1, dtype=paddle.float32)
+    paddle.ones(1, 2, 3, dtype=paddle.float32)
+    paddle.ones([1, 2, 3], dtype=paddle.float32)
+    paddle.ones(size=[1, 2, 3], dtype=paddle.float32)
+
+    paddle.ones([1, 2, 3], paddle.float32)
+    paddle.ones(shape=[1, 2, 3], dtype=paddle.float32)
+    """
+
+    def process(
+        self, args: tuple[Any, ...], kwargs: dict[str, Any]
+    ) -> tuple[tuple[Any, ...], dict[str, Any]]:
+        if 'size' in kwargs:
+            kwargs['shape'] = kwargs.pop('size')
+        elif len(args) >= 1 and isinstance(args[0], int):
+            kwargs['shape'] = list(args)
+            args = ()
+
+        return args, kwargs
+
+
 class VariableArgsDecorator(DecoratorBase):
     def __init__(self, var: str) -> None:
         super().__init__()
diff --git a/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py b/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py
index 841c08919c5e9f..ed77143313948b 100644
--- a/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py
+++ b/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py
@@ -73,6 +73,45 @@ def test_eval_symbolic(self):
         return True
 
 
+class MaxMinWithIndexNet(paddle.nn.Layer):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        min_vals, min_inds = paddle.compat.min(x, dim=-1, keepdim=False)
+        max_vals, max_inds = paddle.compat.max(x, dim=-1, keepdim=True)
+        return min_vals + max_vals.squeeze(axis=-1), min_inds + max_inds
+
+
+class MinMaxWithIndexOpInferSymbolicShapeTest(TestBase):
+    def prepare_data(self):
+        self.cases = [np.random.rand(3, 4, 5, 6), np.random.rand(257)]
+        self.expected = [
+            [
+                'shape[S0, S1, S2], data[NULL]',
+                'shape[S0, Broadcast(S0, S1), Broadcast(S1, S2), S2], data[NULL]',
+            ],
+            ['shape[], data[NULL]', 'shape[1], data[NULL]'],
+        ]
+
+    def test_eval_symbolic(self):
+        net = MaxMinWithIndexNet()
+
+        for i in range(len(self.cases)):
+            x = self.cases[i]
+            x_spec = InputSpec(
+                shape=[None for index in range(len(x.shape))], dtype='float32'
+            )
+            input_spec = [x_spec]
+            net = apply_to_static(net, False, input_spec)
+            net.eval()
+            check_infer_results(
+                net, input_spec, 'builtin.shadow_output', self.expected[i]
+            )
+
+        return True
+
+
 class AsComplexAsRealNet(paddle.nn.Layer):
     def __init__(self):
         super().__init__()
diff --git a/test/legacy_test/test_compat_split.py b/test/legacy_test/test_compat_split.py
new file mode 100644
index 00000000000000..a582f1b0948c4b
--- /dev/null
+++ b/test/legacy_test/test_compat_split.py
@@ -0,0 +1,177 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+
+import paddle
+from paddle.compat import split
+
+
+class TestCompatSplit(unittest.TestCase):
+    def _compare_with_origin(self, input_tensor, size, axis=0):
+        pd_results = split(input_tensor, size, dim=axis)
+
+        if isinstance(size, int):
+            shape_on_axis = input_tensor.shape[axis]
+            remaining_num = shape_on_axis % size
+            num_sections = shape_on_axis // size
+            if remaining_num == 0:
+                size = num_sections
+            else:
+                size = [size for _ in range(num_sections)]
+                size.append(remaining_num)
+
+        origin_results = paddle.split(
+            input_tensor, num_or_sections=size, axis=axis
+        )
+
+        self.assertEqual(len(origin_results), len(pd_results))
+
+        # check shape and output section size of the output
+        for origin_ts, pd_ts in zip(origin_results, pd_results):
+            np.testing.assert_allclose(origin_ts.numpy(), pd_ts.numpy())
+
+    def test_basic_split(self):
+        """Test basic splitting with integer size"""
+        data = paddle.arange(12).reshape([3, 4]).astype('float32')
+        self._compare_with_origin(data, 1, 0)
+        self._compare_with_origin(data, 2, 1)
+
+    def test_split_with_list_sections(self):
+        """Test splitting with list of section sizes"""
+        data = paddle.rand([10, 5])
+        self._compare_with_origin(data, [3, 2, 5], 0)
+        self._compare_with_origin(data, [1, 4], -1)
+
+    def test_chained_operations(self):
+        """Test split with complex operation chain"""
+        x = paddle.rand([8, 12])
+        y = paddle.sin(x) * 2.0 + paddle.exp(x) / 3.0
+        z = paddle.nn.functional.relu(y)
+
+        z1, z2 = split(z, 7, dim=1)
+
+        self.assertEqual(z1.shape, [8, 7])
+        self.assertEqual(z2.shape, [8, 5])
+
+        z_np = z.numpy()
+        np.testing.assert_allclose(z_np[:, :7], z1.numpy())
+        np.testing.assert_allclose(z_np[:, 7:], z2.numpy())
+
+    def test_split_grad(self):
+        """Test backprop for split, in1 and in2 are computed by
+        compat.split and original split"""
+
+        def get_tensors():
+            np.random.seed(114514)
+            np_arr = np.random.normal(0, 1, [2, 3, 4, 5])
+            return paddle.to_tensor(np_arr), paddle.to_tensor(np_arr)
+
+        in1, in2 = get_tensors()
+        in1.stop_gradient = False
+        in2.stop_gradient = False
+
+        def computation_graph(in_tensor):
+            y = in_tensor * 2.3 + 3.0
+            y = paddle.maximum(y, paddle.to_tensor([0], dtype=paddle.float32))
+            return y.mean(axis=0)
+
+        out1 = computation_graph(in1)
+        out2 = computation_graph(in2)
+
+        packs1 = paddle.compat.split(out1, 2, dim=2)
+        packs2 = paddle.split(out2, [2, 2, 1], axis=2)
+
+        res1 = packs1[0] + packs1[1] + packs1[2]
+        res2 = packs2[0] + packs2[1] + packs2[2]
+        res1.backward()
+        res2.backward()
+        np.testing.assert_allclose(in1.grad.numpy(), in2.grad.numpy())
+
+    def test_empty_dim(self):
+        """Split with empty dim"""
+        in_tensor = paddle.arange(72, dtype=paddle.int64).reshape([3, 12, 2])
+        self._compare_with_origin(in_tensor, [5, 0, 7], axis=1)
+
+    def test_split_with_one_block(self):
+        """Resulting tuple should be of length 1"""
+        in_tensor = paddle.arange(60, dtype=paddle.float32).reshape([3, 4, 5])
+        self._compare_with_origin(in_tensor, 5, paddle.to_tensor([-1]))
+        self._compare_with_origin(in_tensor, [5], paddle.to_tensor(2))
+
+    def test_edge_cases(self):
+        """Test edge cases and error handling"""
+        x = paddle.arange(5)
+        s1, s2 = split(x, [3, 2])
+        np.testing.assert_allclose(s1.numpy(), [0, 1, 2])
+        np.testing.assert_allclose(s2.numpy(), [3, 4])
+
+        x = paddle.rand([2, 2, 2])
+        a, b = split(x, 1, 2)
+        self.assertEqual(a.shape, [2, 2, 1])
+
+        # invalid split sections
+        with self.assertRaises(ValueError):
+            split(x, [3, 1], 1)
+
+        # invalid split axis
+        with self.assertRaises(ValueError):
+            split(x, 2, 3)
+
+    def test_error_hint(self):
+        """Test whether there will be correct exception when users pass paddle.split kwargs in paddle.compat.split, vice versa."""
+        x = paddle.randn([3, 9, 5])
+
+        msg_gt_1 = (
+            "paddle.split() received unexpected keyword arguments 'dim', 'split_size_or_sections', 'tensor'. "
+            "\nDid you mean to use paddle.compat.split() instead?"
+        )
+        msg_gt_2 = (
+            "paddle.compat.split() received unexpected keyword argument 'num_or_sections'. "
+            "\nDid you mean to use paddle.split() instead?"
+        )
+        msg_gt_3 = "(InvalidArgument) The dim is expected to be in range of [-3, 3), but got 3"
+        msg_gt_4 = "paddle.compat.split expects split_sizes have only non-negative entries, but got size = -5 on dim 2"
+
+        split_size = paddle.to_tensor([3])
+        msg_gt_5 = (
+            "The type of 'split_size_or_sections' in split must be int, list or tuple in imperative mode, but "
+            f"received {type(split_size)}."
+        )
+
+        with self.assertRaises(TypeError) as cm:
+            tensors = paddle.split(tensor=x, split_size_or_sections=3, dim=0)
+        self.assertEqual(str(cm.exception), msg_gt_1)
+
+        with self.assertRaises(TypeError) as cm:
+            tensors = split(x, num_or_sections=3, dim=0)
+        self.assertEqual(str(cm.exception), msg_gt_2)
+
+        with self.assertRaises(ValueError) as cm:
+            tensors = split(x, 3, dim=3)
+        self.assertEqual(str(cm.exception), msg_gt_3)
+
+        with self.assertRaises(ValueError) as cm:
+            tensors = split(x, [3, 3, -5], -2)
+        self.assertEqual(str(cm.exception), msg_gt_4)
+
+        with self.assertRaises(TypeError) as cm:
+            tensors = split(x, split_size, 1)
+        self.assertEqual(str(cm.exception), msg_gt_5)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/legacy_test/test_compat_split_static.py b/test/legacy_test/test_compat_split_static.py
new file mode 100644
index 00000000000000..006e3ec30ea077
--- /dev/null
+++ b/test/legacy_test/test_compat_split_static.py
@@ -0,0 +1,184 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+
+import paddle
+from paddle.compat import split
+
+
+class TestCompatSplitStatic(unittest.TestCase):
+    def _compare_with_origin_static(
+        self, input_shape, size, axis=0, dim_rank=-1
+    ):
+        """size_dim: -1 means we input size by int, 0 means 0-size tensor, 1 means tensor with shape [1]"""
+        numel = 1
+        for v in input_shape:
+            numel *= v
+        input_axis = axis
+        if dim_rank == 0:
+            input_axis = paddle.to_tensor(axis)
+        elif dim_rank == 1:
+            input_axis = paddle.to_tensor([axis])
+        paddle.enable_static()
+        with paddle.static.program_guard(paddle.static.Program()):
+            input_tensor = paddle.arange(numel, dtype=paddle.float32).reshape(
+                input_shape
+            )
+            pd_results = split(input_tensor, size, dim=input_axis)
+
+            if isinstance(size, int):
+                shape_on_axis = input_tensor.shape[axis]
+                remaining_num = shape_on_axis % size
+                num_sections = shape_on_axis // size
+                if remaining_num == 0:
+                    size = num_sections
+                else:
+                    size = [size for _ in range(num_sections)]
+                    size.append(remaining_num)
+
+            origin_results = paddle.split(
+                input_tensor, num_or_sections=size, axis=axis
+            )
+            assert len(pd_results) == len(origin_results), "length mismatched"
+            place = (
+                paddle.CUDAPlace(0)
+                if paddle.is_compiled_with_cuda()
+                else paddle.CPUPlace()
+            )
+            exe = paddle.static.Executor(place)
+            results = exe.run(fetch_list=[*origin_results, *pd_results])
+            length_needed = len(results) // 2
+            for i in range(length_needed):
+                np.testing.assert_allclose(
+                    results[i], results[i + length_needed]
+                )
+        paddle.disable_static()
+
+    def test_split_composite_static(self):
+        paddle.seed(114514)
+
+        def get_tensors():
+            np.random.seed(114514)
+            np_arr = np.random.normal(0, 1, [2, 3, 4, 5])
+            return paddle.to_tensor(np_arr), paddle.to_tensor(np_arr)
+
+        in1, in2 = get_tensors()
+        in1.stop_gradient = False
+        in2.stop_gradient = False
+
+        @paddle.jit.to_static
+        def computation_graph(in1: paddle.Tensor, in2: paddle.Tensor):
+            y1 = in1 * 1.5 + 1.0
+            y1 = paddle.minimum(y1, paddle.to_tensor([0], dtype=paddle.float32))
+            out1 = y1.mean(axis=0)
+
+            y2 = in2 * 1.5 + 1.0
+            y2 = paddle.minimum(y2, paddle.to_tensor([0], dtype=paddle.float32))
+            out2 = y2.mean(axis=0)
+
+            packs1 = paddle.compat.split(out1, 2, dim=2)
+            packs2 = paddle.split(out2, [2, 2, 1], axis=2)
+
+            res1 = packs1[0] + packs1[1] + packs1[2]
+            res2 = packs2[0] + packs2[1] + packs2[2]
+
+            return res1, res2
+
+        res1, res2 = computation_graph(in1, in2)
+        np.testing.assert_allclose(res1.numpy(), res2.numpy())
+
+    def test_static_graph(self):
+        """Test static graph execution"""
+        # fixed random seed for reproducibility
+        np.random.seed(114514)
+        # old static graph mode
+        paddle.enable_static()
+
+        with paddle.static.program_guard(paddle.static.Program()):
+            x = paddle.static.data(name='x', shape=[None, 6], dtype='float32')
+            result0, result1 = split(x, split_size_or_sections=[3, 3], dim=1)
+            output = result0 * 2.0 + paddle.sin(result1)
+
+            place = (
+                paddle.CUDAPlace(0)
+                if paddle.is_compiled_with_cuda()
+                else paddle.CPUPlace()
+            )
+            exe = paddle.static.Executor(place)
+
+            input_data = np.random.rand(3, 6).astype('float32')
+            feed = {'x': input_data}
+
+            results = exe.run(feed=feed, fetch_list=[result0, result1, output])
+
+            pd_result0, pd_result1 = results[0], results[1]
+            np.testing.assert_allclose(input_data[:, :3], pd_result0)
+            np.testing.assert_allclose(input_data[:, 3:], pd_result1)
+
+            expected_output = input_data[:, :3] * 2.0 + np.sin(
+                input_data[:, 3:]
+            )
+            np.testing.assert_allclose(
+                expected_output, results[2], rtol=1e-4, atol=1e-4
+            )
+
+        paddle.disable_static()
+
+    def test_error_hint(self):
+        """Test whether there will be correct exception when users pass paddle.split kwargs in paddle.compat.split, vice versa."""
+
+        msg_gt_1 = "split_size_or_sections must be greater than 0."
+        msg_gt_2 = "len(split_size_or_sections) must not be more than input.shape[dim]."
+        msg_gt_3 = "The type of 'split_size_or_sections' in split must be int, list or tuple in imperative mode."
+        msg_gt_4 = (
+            "'dim' is not allowed to be a pir.Value in a static graph: "
+            "\npir.Value can not be used for indexing python lists/tuples."
+        )
+
+        paddle.enable_static()
+        with self.assertRaises(AssertionError) as cm:
+            x = paddle.randn([3, 4, 5])
+            tensors = split(x, -2, dim=0)
+        self.assertEqual(str(cm.exception), msg_gt_1)
+
+        with self.assertRaises(AssertionError) as cm:
+            x = paddle.randn([3, 4, 5])
+            tensors = split(x, (1, 1, 1, 1, 2, 2), dim=-1)
+        self.assertEqual(str(cm.exception), msg_gt_2)
+
+        with self.assertRaises(TypeError) as cm:
+            x = paddle.randn([3, 4, 5])
+            tensors = split(x, paddle.to_tensor(2), dim=2)
+        self.assertEqual(str(cm.exception), msg_gt_3)
+
+        with self.assertRaises(TypeError) as cm:
+            x = paddle.randn([3, 4, 5])
+            tensors = split(x, 2, dim=paddle.to_tensor(2))
+        paddle.disable_static()
+        self.assertEqual(str(cm.exception), msg_gt_4)
+
+    def test_basic_split(self):
+        """Test basic splitting with integer size"""
+        input_shape = [3, 6]
+        self._compare_with_origin_static(input_shape, 1, 0)
+        self._compare_with_origin_static(input_shape, 3, -1)
+        self._compare_with_origin_static(input_shape, 4, dim_rank=0)
+        self._compare_with_origin_static(input_shape, 3, dim_rank=1)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/legacy_test/test_minmax_with_index_op.py b/test/legacy_test/test_minmax_with_index_op.py
index c95238739c4be3..c38509c9c44285 100644
--- a/test/legacy_test/test_minmax_with_index_op.py
+++ b/test/legacy_test/test_minmax_with_index_op.py
@@ -232,6 +232,11 @@ def test_check_grad(self):
 
 
 class TestMinMaxWithIndexPlace(unittest.TestCase):
+    """min/max_with_index has no CPU version, so when CUDA is not available,
+    we skip all the above test. A runtime error will be emitted if min/max_with_index
+    is called on CPU, this unit test tries capturing it.
+    """
+
     def init(self):
         self.input_shape = [30, 10, 10]
         self.data = np.random.randn(30, 10, 10)
diff --git a/test/legacy_test/test_zero_dim_sundry_dygraph_api.py b/test/legacy_test/test_zero_dim_sundry_dygraph_api.py
index bc958ca42bf242..b8debdcab006bb 100644
--- a/test/legacy_test/test_zero_dim_sundry_dygraph_api.py
+++ b/test/legacy_test/test_zero_dim_sundry_dygraph_api.py
@@ -551,6 +551,61 @@ def test_argmax(self):
         out = paddle.argmax(x, keepdim=True)
         self.assertEqual(out.shape, [1, 1])
 
+    def _make_compat_minmax_test(self, func_name):
+        # 1) x is 0D
+        x = paddle.rand([])
+        val1, ind1 = func_name(x, 0)
+        val2, ind2 = func_name(x, -1)
+        val3 = func_name(x)
+
+        self.assertEqual(val1.shape, [])
+        self.assertEqual(ind1.shape, [])
+        np.testing.assert_allclose(val1, x)
+        np.testing.assert_allclose(ind1, 0)
+
+        self.assertEqual(val2.shape, [])
+        self.assertEqual(ind2.shape, [])
+        np.testing.assert_allclose(val2, x)
+        np.testing.assert_allclose(ind2, 0)
+
+        self.assertEqual(val3.shape, [])
+        np.testing.assert_allclose(val3, x)
+
+        # 2) x is 1D
+        x = paddle.rand([5])
+        val, ind = func_name(x, 0)
+        self.assertEqual(val.shape, [])
+        self.assertEqual(ind.shape, [])
+
+        # 3) x is ND
+        x = paddle.rand([3, 5])
+        val, ind = func_name(x, dim=1)
+        self.assertEqual(val.shape, [3])
+        self.assertEqual(ind.shape, [3])
+
+        val = func_name(x)
+        self.assertEqual(val.shape, [])
+
+        # 4) x is ND, keepdim=True
+        x = paddle.rand([3, 5])
+        val, ind = func_name(x, dim=0, keepdim=True)
+        self.assertEqual(val.shape, [1, 5])
+        self.assertEqual(ind.shape, [1, 5])
+
+        # 5) test backward
+        x = paddle.randn([4, 5])
+        x.stop_gradient = False
+
+        val, ind = func_name(x, dim=0)
+        val.backward()
+        self.assertEqual(x.grad.shape, [4, 5])
+
+    def test_compat_min(self):
+        self._make_compat_minmax_test(paddle.compat.min)
+
+    def test_compat_max(self):
+        self._make_compat_minmax_test(paddle.compat.max)
+
     def test_kthvalue(self):
         # 1) x is 0D
         x = paddle.randn([])

From 17d848c3a80c72bf66c4638be37e91c3461ccd36 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Tue, 19 Aug 2025 12:36:04 +0000
Subject: [PATCH 21/24] [API-Compat] Updated unittests

---
 .../kernels/cpu/min_max_with_index_kernel.cc  | 96 -------------------
 test/legacy_test/test_minmax_with_index_op.py | 51 ----------
 2 files changed, 147 deletions(-)
 delete mode 100644 paddle/phi/kernels/cpu/min_max_with_index_kernel.cc

diff --git a/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc b/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc
deleted file mode 100644
index 0cf9de846ba043..00000000000000
--- a/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/kernels/min_max_with_index_kernel.h"
-
-#include "paddle/common/ddim.h"
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/core/utils/data_type.h"
-#include "paddle/phi/kernels/funcs/eigen/common.h"
-#include "paddle/phi/kernels/funcs/math_function.h"
-
-#define DEFINE_WITH_INDEX_KERNEL(OpType, name)                          \
-  template <typename T, typename Context>                               \
-  void OpType##WithIndexKernel(const Context& dev_ctx,                  \
-                               const DenseTensor& x,                    \
-                               const Scalar& dim,                       \
-                               bool keepdim,                            \
-                               bool flatten,                            \
-                               DenseTensor* val_out,                    \
-                               DenseTensor* ind_out) {                  \
-    PADDLE_ENFORCE_EQ(0,                                                \
-                      1,                                                \
-                      phi::errors::PreconditionNotMet(                  \
-                          "In static graph mode, %s PHI kernel is not " \
-                          "currently available on non-GPU devices.",    \
-                          #name));                                      \
-  }                                                                     \
-  template <typename T, typename Context>                               \
-  void OpType##WithIndexGradKernel(const Context& dev_ctx,              \
-                                   const DenseTensor& x,                \
-                                   const DenseTensor& values,           \
-                                   const DenseTensor& indices,          \
-                                   const DenseTensor& values_grad,      \
-                                   const Scalar& dim,                   \
-                                   bool keepdim,                        \
-                                   DenseTensor* x_grad) {               \
-    PADDLE_ENFORCE_EQ(0,                                                \
-                      1,                                                \
-                      phi::errors::PreconditionNotMet(                  \
-                          "In static graph mode, %s PHI kernel is not " \
-                          "currently available on non-GPU devices.",    \
-                          #name));                                      \
-  }
-
-namespace phi {
-
-DEFINE_WITH_INDEX_KERNEL(Min, min_with_index)
-DEFINE_WITH_INDEX_KERNEL(Max, max_with_index)
-#undef DEFINE_WITH_INDEX_KERNEL
-
-}  // namespace phi
-
-#define REGISTER_CPU_KERNELS(OpType, OpName)                   \
-  PD_REGISTER_KERNEL(OpName,                                   \
-                     CPU,                                      \
-                     ALL_LAYOUT,                               \
-                     phi::OpType##WithIndexKernel,             \
-                     phi::dtype::float16,                      \
-                     phi::dtype::bfloat16,                     \
-                     float,                                    \
-                     double,                                   \
-                     int32_t,                                  \
-                     int64_t,                                  \
-                     int16_t,                                  \
-                     uint8_t) {                                \
-    kernel->OutputAt(0).SetDataType(kernel->InputAt(0).dtype); \
-    kernel->OutputAt(1).SetDataType(phi::DataType::INT64);     \
-  }                                                            \
-  PD_REGISTER_KERNEL(OpName##_grad,                            \
-                     CPU,                                      \
-                     ALL_LAYOUT,                               \
-                     phi::OpType##WithIndexGradKernel,         \
-                     float,                                    \
-                     double,                                   \
-                     uint8_t,                                  \
-                     int,                                      \
-                     int16_t,                                  \
-                     int64_t,                                  \
-                     phi::dtype::float16,                      \
-                     phi::dtype::bfloat16) {}
-
-REGISTER_CPU_KERNELS(Min, min_with_index)
-REGISTER_CPU_KERNELS(Max, max_with_index)
-#undef REGISTER_CPU_KERNELS
diff --git a/test/legacy_test/test_minmax_with_index_op.py b/test/legacy_test/test_minmax_with_index_op.py
index c38509c9c44285..6bd686cc71270e 100644
--- a/test/legacy_test/test_minmax_with_index_op.py
+++ b/test/legacy_test/test_minmax_with_index_op.py
@@ -231,56 +231,5 @@ def test_check_grad(self):
         pass
 
 
-class TestMinMaxWithIndexPlace(unittest.TestCase):
-    """min/max_with_index has no CPU version, so when CUDA is not available,
-    we skip all the above test. A runtime error will be emitted if min/max_with_index
-    is called on CPU, this unit test tries capturing it.
-    """
-
-    def init(self):
-        self.input_shape = [30, 10, 10]
-        self.data = np.random.randn(30, 10, 10)
-
-    def setUp(self):
-        self.init()
-
-    def cpu_place(self):
-        self.place = core.CPUPlace()
-
-    def test_api_static_cpu_err_handling_1(self):
-        self.cpu_place()
-        with (
-            self.assertRaises(RuntimeError),
-            paddle.static.program_guard(paddle.static.Program()),
-        ):
-            input = paddle.static.data(
-                name="input", shape=self.input_shape, dtype="float64"
-            )
-            output = max_with_index(input, dim=0)
-            exe = paddle.static.Executor(self.place)
-            result = exe.run(
-                paddle.static.default_main_program(),
-                feed={'input': self.data},
-                fetch_list=[output],
-            )
-
-    def test_api_static_cpu_err_handling_2(self):
-        self.cpu_place()
-        with (
-            self.assertRaises(RuntimeError),
-            paddle.static.program_guard(paddle.static.Program()),
-        ):
-            input = paddle.static.data(
-                name="input", shape=self.input_shape, dtype="float32"
-            )
-            output = min_with_index(input, dim=-2, keepdim=True)
-            exe = paddle.static.Executor(self.place)
-            result = exe.run(
-                paddle.static.default_main_program(),
-                feed={'input': self.data.astype(np.float32)},
-                fetch_list=[output],
-            )
-
-
 if __name__ == "__main__":
     unittest.main()

From 822e8d75fab6a948854c6f726c9c8571b89e4c6d Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Wed, 20 Aug 2025 02:11:17 +0000
Subject: [PATCH 22/24] [API-Compat] Update version year

---
 paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu | 2 +-
 paddle/phi/kernels/gpu/min_max_with_index_kernel.cu      | 2 +-
 paddle/phi/kernels/min_max_with_index_kernel.h           | 2 +-
 test/legacy_test/test_minmax_with_index_op.py            | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu b/paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu
index f34d03bf07e506..2cbffdb67cb3ae 100644
--- a/paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu
@@ -1,4 +1,4 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu b/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu
index 2509c34fb0c8fd..521444ef9e9481 100644
--- a/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu
+++ b/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu
@@ -1,4 +1,4 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/paddle/phi/kernels/min_max_with_index_kernel.h b/paddle/phi/kernels/min_max_with_index_kernel.h
index eca50fc3a752e8..56e733fcdbeef8 100644
--- a/paddle/phi/kernels/min_max_with_index_kernel.h
+++ b/paddle/phi/kernels/min_max_with_index_kernel.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
diff --git a/test/legacy_test/test_minmax_with_index_op.py b/test/legacy_test/test_minmax_with_index_op.py
index 6bd686cc71270e..d80d89ae3e3c09 100644
--- a/test/legacy_test/test_minmax_with_index_op.py
+++ b/test/legacy_test/test_minmax_with_index_op.py
@@ -1,4 +1,4 @@
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 17f080e960b07b38e65da4cf2a733093750527b3 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Wed, 20 Aug 2025 06:41:15 +0000
Subject: [PATCH 23/24] [API-Compat] Fixed min/max out mechanism

---
 python/paddle/tensor/compat.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py
index 6ea2fcb5c80015..3995a274309144 100644
--- a/python/paddle/tensor/compat.py
+++ b/python/paddle/tensor/compat.py
@@ -672,14 +672,13 @@ def min(
         _check_out_status(out, False)
         ret = _C_ops.minimum(input, dim_or_other)
 
-    if out is None:
-        return ret
-    else:
+    if out is not None:
         if isinstance(ret, MinMaxRetType):
             paddle.assign(ret.values, out[0])
             paddle.assign(ret.indices, out[1])
         else:
             paddle.assign(ret, out)
+    return ret
 
 
 @ForbidKeywordsDecorator(
@@ -822,11 +821,10 @@ def max(
         _check_out_status(out, False)
         ret = _C_ops.maximum(input, dim_or_other)
 
-    if out is None:
-        return ret
-    else:
+    if out is not None:
         if isinstance(ret, MinMaxRetType):
             paddle.assign(ret.values, out[0])
             paddle.assign(ret.indices, out[1])
         else:
             paddle.assign(ret, out)
+    return ret

From 0fbbb99c61948a06d2455372a11ff2b77f6206c5 Mon Sep 17 00:00:00 2001
From: Enigmatisms <heqianyue@baidu.com>
Date: Fri, 22 Aug 2025 17:13:02 +0000
Subject: [PATCH 24/24] [API-Compat] Try adding even more unittests.

---
 .../infer_symbolic_shape/unary_infer_sym.cc   |   2 +-
 paddle/phi/infermeta/unary.cc                 | 148 ++++++++----------
 paddle/phi/ops/yaml/ops.yaml                  |   4 +-
 .../symbolic/test_infer_sym_shape_unary_op.py |  39 -----
 .../cinn/symbolic/test_minmax_infer_sym.py    | 119 ++++++++++++++
 test/legacy_test/test_compat_minmax.py        |  56 +++++++
 .../test_zero_dim_sundry_dygraph_api.py       |  37 +++++
 7 files changed, 279 insertions(+), 126 deletions(-)
 create mode 100644 test/ir/pir/cinn/symbolic/test_minmax_infer_sym.py

diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc
index 9bf285da4d77a9..ab9e020aea41ea 100644
--- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc
+++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc
@@ -352,7 +352,7 @@ bool MinMaxOpInferSymbolicShape(pir::Operation *op,
     std::vector<symbol::DimExpr> out_sym_shape;
     if (flatten) {
       if (keepdims) {
-        out_sym_shape.emplace_back(std::int64_t(rank));
+        out_sym_shape.resize(rank, std::int64_t(1));
       } else {
         out_sym_shape = {};
       }
diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc
index fe014446c88ce2..ab8dff4a9e8d2d 100644
--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -366,90 +366,6 @@ void ArgMinMaxInferMeta(const MetaTensor& x,
   }
 }
 
-void MinMaxWithIndexInferMeta(const MetaTensor& x,
-                              const Scalar& axis,
-                              bool keepdims,
-                              bool flatten,
-                              MetaTensor* val_out,
-                              MetaTensor* ind_out,
-                              MetaConfig config) {
-  DataType val_dtype = x.dtype();
-
-  if (!config.is_runtime && axis.FromTensor()) {
-    std::vector<int64_t> vec;
-    if (flatten) {
-      if (keepdims) {  // NOLINT
-        vec = std::vector<int64_t>(x.dims().size(), -1);
-      } else {
-        vec = {};
-      }
-    } else {
-      if (keepdims) {
-        vec = std::vector<int64_t>(x.dims().size(), -1);
-      } else {
-        vec = std::vector<int64_t>(x.dims().size() - 1, -1);
-      }
-    }
-    val_out->set_dims(common::make_ddim(vec));
-    val_out->set_dtype(val_dtype);
-    ind_out->set_dims(common::make_ddim(vec));
-    ind_out->set_dtype(DataType::INT64);
-    return;
-  }
-  auto int_axis = axis.to<int64_t>();
-  const auto& x_dims = x.dims();
-
-  auto x_rank = x.dims().size();
-  if (x_rank > 0) {
-    PADDLE_ENFORCE_GE(int_axis,
-                      -x_rank,
-                      common::errors::InvalidArgument(
-                          "'axis'(%d) must be greater than or equal to"
-                          " -Rank(X)(%d).",
-                          int_axis,
-                          -x_rank));
-    PADDLE_ENFORCE_LT(
-        int_axis,
-        x_rank,
-        common::errors::InvalidArgument(
-            "'axis'(%d) must be less than Rank(X)(%d) of Input(X).",
-            int_axis,
-            x_rank));
-  } else {
-    // 0-dim tensor
-    PADDLE_ENFORCE_EQ(int_axis == 0 || int_axis == -1,
-                      true,
-                      common::errors::InvalidArgument(
-                          "'axis'(%d) must be 0 or -1 if input tensor is "
-                          "0-dim.",
-                          int_axis));
-  }
-
-  if (int_axis < 0) int_axis += x_rank;
-
-  std::vector<int64_t> vec;
-  if (flatten) {
-    if (keepdims) {  // NOLINT
-      vec = std::vector<int64_t>(x.dims().size(), 1);
-    } else {
-      vec = {};
-    }
-  } else {
-    for (int64_t i = 0; i < int_axis; i++)
-      vec.emplace_back(x_dims[static_cast<int>(i)]);
-    if (keepdims) {
-      vec.emplace_back(static_cast<int64_t>(1));
-    }
-    for (int64_t i = int_axis + 1; i < x_rank; i++)
-      vec.emplace_back(x_dims[static_cast<int>(i)]);
-  }
-
-  val_out->set_dims(common::make_ddim(vec));
-  val_out->set_dtype(val_dtype);
-  ind_out->set_dims(common::make_ddim(vec));
-  ind_out->set_dtype(DataType::INT64);
-}
-
 void ArgsortInferMeta(const MetaTensor& input,
                       int axis,
                       bool descending,
@@ -3034,6 +2950,70 @@ void ModeInferMeta(const MetaTensor& x,
   indices->set_dtype(DataType::INT64);
 }
 
+void MinMaxWithIndexInferMeta(const MetaTensor& x,
+                              const Scalar& axis,
+                              bool keepdims,
+                              bool flatten,
+                              MetaTensor* val_out,
+                              MetaTensor* ind_out,
+                              MetaConfig config) {
+  DataType val_dtype = x.dtype();
+
+  // axis.FromTensor will never be true for this op
+  auto int_axis = axis.to<int64_t>();
+  const auto& x_dims = x.dims();
+
+  auto x_rank = x.dims().size();
+  if (x_rank > 0) {
+    PADDLE_ENFORCE_GE(int_axis,
+                      -x_rank,
+                      common::errors::InvalidArgument(
+                          "'axis'(%d) must be greater than or equal to"
+                          " -Rank(X)(%d).",
+                          int_axis,
+                          -x_rank));
+    PADDLE_ENFORCE_LT(
+        int_axis,
+        x_rank,
+        common::errors::InvalidArgument(
+            "'axis'(%d) must be less than Rank(X)(%d) of Input(X).",
+            int_axis,
+            x_rank));
+  } else {
+    // 0-dim tensor
+    PADDLE_ENFORCE_EQ(int_axis == 0 || int_axis == -1,
+                      true,
+                      common::errors::InvalidArgument(
+                          "'axis'(%d) must be 0 or -1 if input tensor is "
+                          "0-dim.",
+                          int_axis));
+  }
+
+  if (int_axis < 0) int_axis += x_rank;
+
+  std::vector<int64_t> vec;
+  if (flatten) {
+    if (keepdims) {  // NOLINT
+      vec = std::vector<int64_t>(x.dims().size(), 1);
+    } else {
+      vec = {};
+    }
+  } else {
+    for (int64_t i = 0; i < int_axis; i++)
+      vec.emplace_back(x_dims[static_cast<int>(i)]);
+    if (keepdims) {
+      vec.emplace_back(static_cast<int64_t>(1));
+    }
+    for (int64_t i = int_axis + 1; i < x_rank; i++)
+      vec.emplace_back(x_dims[static_cast<int>(i)]);
+  }
+
+  val_out->set_dims(common::make_ddim(vec));
+  val_out->set_dtype(val_dtype);
+  ind_out->set_dims(common::make_ddim(vec));
+  ind_out->set_dtype(DataType::INT64);
+}
+
 void MultinomialInferMeta(const MetaTensor& x,
                           const Scalar& num_samples,
                           bool replacement,
diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml
index 78dcca6d579589..694b19cbe62188 100644
--- a/paddle/phi/ops/yaml/ops.yaml
+++ b/paddle/phi/ops/yaml/ops.yaml
@@ -3586,7 +3586,7 @@
     func : max_with_index
     data_type : x
   backward : max_with_index_grad
-  interfaces : paddle::dialect::InferSymbolicShapeInterface, paddle::dialect::LayoutTransformationInterface
+  interfaces : paddle::dialect::InferSymbolicShapeInterface
 
 - op : maxout
   args : (Tensor x, int groups, int axis = 1)
@@ -3706,7 +3706,7 @@
     func : min_with_index
     data_type : x
   backward : min_with_index_grad
-  interfaces : paddle::dialect::InferSymbolicShapeInterface, paddle::dialect::LayoutTransformationInterface
+  interfaces : paddle::dialect::InferSymbolicShapeInterface
 
 - op : mish
   args : (Tensor x, float lambda)
diff --git a/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py b/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py
index ed77143313948b..841c08919c5e9f 100644
--- a/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py
+++ b/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py
@@ -73,45 +73,6 @@ def test_eval_symbolic(self):
         return True
 
 
-class MaxMinWithIndexNet(paddle.nn.Layer):
-    def __init__(self):
-        super().__init__()
-
-    def forward(self, x):
-        min_vals, min_inds = paddle.compat.min(x, dim=-1, keepdim=False)
-        max_vals, max_inds = paddle.compat.max(x, dim=-1, keepdim=True)
-        return min_vals + max_vals.squeeze(axis=-1), min_inds + max_inds
-
-
-class MinMaxWithIndexOpInferSymbolicShapeTest(TestBase):
-    def prepare_data(self):
-        self.cases = [np.random.rand(3, 4, 5, 6), np.random.rand(257)]
-        self.expected = [
-            [
-                'shape[S0, S1, S2], data[NULL]',
-                'shape[S0, Broadcast(S0, S1), Broadcast(S1, S2), S2], data[NULL]',
-            ],
-            ['shape[], data[NULL]', 'shape[1], data[NULL]'],
-        ]
-
-    def test_eval_symbolic(self):
-        net = MaxMinWithIndexNet()
-
-        for i in range(len(self.cases)):
-            x = self.cases[i]
-            x_spec = InputSpec(
-                shape=[None for index in range(len(x.shape))], dtype='float32'
-            )
-            input_spec = [x_spec]
-            net = apply_to_static(net, False, input_spec)
-            net.eval()
-            check_infer_results(
-                net, input_spec, 'builtin.shadow_output', self.expected[i]
-            )
-
-        return True
-
-
 class AsComplexAsRealNet(paddle.nn.Layer):
     def __init__(self):
         super().__init__()
diff --git a/test/ir/pir/cinn/symbolic/test_minmax_infer_sym.py b/test/ir/pir/cinn/symbolic/test_minmax_infer_sym.py
new file mode 100644
index 00000000000000..81975c8029bb33
--- /dev/null
+++ b/test/ir/pir/cinn/symbolic/test_minmax_infer_sym.py
@@ -0,0 +1,119 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import unittest
+from os.path import dirname
+
+import numpy as np
+from test_infer_sym_shape_utils import (
+    TestBase,
+    check_infer_results,
+)
+
+import paddle
+from paddle.static import InputSpec
+
+sys.path.append(dirname(dirname(__file__)))
+from utils import apply_to_static
+
+# NOTE(SigureMo): Disable the CSE optimization to avoid op number change.
+paddle.set_flags({"FLAGS_enable_cse_in_dy2st": False})
+
+
+class MaxMinWithIndexNet(paddle.nn.Layer):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        min_vals, min_inds = paddle.compat.min(x, dim=-1, keepdim=False)
+        max_vals, max_inds = paddle.compat.max(x, dim=-1, keepdim=True)
+        return min_vals + max_vals.squeeze(axis=-1), min_inds + max_inds
+
+
+class MinMaxWithIndexOpInferSymbolicShapeTest(TestBase):
+    def prepare_data(self):
+        self.cases = [np.random.rand(3, 4, 5, 6), np.random.rand(257)]
+        self.expected = [
+            [
+                'shape[S0, S1, S2], data[NULL]',
+                'shape[S0, Broadcast(S0, S1), Broadcast(S1, S2), S2], data[NULL]',
+            ],
+            ['shape[], data[NULL]', 'shape[1], data[NULL]'],
+        ]
+
+    def test_eval_symbolic(self):
+        net = MaxMinWithIndexNet()
+
+        for i in range(len(self.cases)):
+            x = self.cases[i]
+            x_spec = InputSpec(
+                shape=[None for index in range(len(x.shape))], dtype='float32'
+            )
+            input_spec = [x_spec]
+            net = apply_to_static(net, False, input_spec)
+            net.eval()
+            check_infer_results(
+                net, input_spec, 'builtin.shadow_output', self.expected[i]
+            )
+
+        return True
+
+
+class MinMaxWithIndexRawNet(paddle.nn.Layer):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        x = x * 2 + 1
+        min_vals, min_inds = paddle._C_ops.min_with_index(x, 1, False, True)
+        max_vals, max_inds = paddle._C_ops.max_with_index(x, 2, True, True)
+        return min_vals + max_vals.squeeze(), min_inds * max_inds
+
+
+class MinMaxWithIndexOpRawInferShapeTest(TestBase):
+    def prepare_data(self):
+        self.cases = [np.random.rand(4, 5, 6), np.random.rand(3, 7, 1, 2)]
+        self.expected = [
+            [
+                'shape[], data[NULL]',
+                'shape[1, 1, 1], data[NULL]',
+            ],
+            ['shape[], data[NULL]', 'shape[1, 1, 1, 1], data[NULL]'],
+        ]
+
+    @unittest.skipIf(
+        not paddle.core.is_compiled_with_cuda(),
+        "core is not compiled with CUDA, skipping",
+    )
+    def test_eval_symbolic(self):
+        net = MinMaxWithIndexRawNet()
+
+        for i in range(len(self.cases)):
+            x = self.cases[i]
+            x_spec = InputSpec(
+                shape=[None for index in range(len(x.shape))], dtype='float32'
+            )
+            input_spec = [x_spec]
+            net = apply_to_static(net, False, input_spec)
+            net.eval()
+            check_infer_results(
+                net, input_spec, 'builtin.shadow_output', self.expected[i]
+            )
+
+        return True
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py
index d043b82fcea426..0354e72a3759b9 100644
--- a/test/legacy_test/test_compat_minmax.py
+++ b/test/legacy_test/test_compat_minmax.py
@@ -490,6 +490,62 @@ def test_static_graph(self):
         self._compare_with_origin_static([3, 10, 2], 0, keepdim=True)
         self._compare_with_origin_static([17], 0)
 
+    @unittest.skipIf(
+        not core.is_compiled_with_cuda(),
+        "core is not compiled with CUDA, skipping",
+    )
+    def test_static_unary_shape_infer_1(self):
+        # min/max with index is a GPU only op, no need for testing if there is no GPU
+
+        @paddle.jit.to_static(full_graph=True)
+        def static_func1(x):
+            y = paddle.zeros([2, 3, 4])
+            return paddle._C_ops.min_with_index(y, x.shape[0], False, False)
+
+        @paddle.jit.to_static(full_graph=True)
+        def static_func2(x):
+            y = paddle.zeros([2, 3, 4])
+            return paddle._C_ops.min_with_index(y, x.shape[0], True, False)
+
+        input_ts1 = paddle.to_tensor([1])
+        input_ts2 = paddle.to_tensor([1, 2])
+        val1, ind1 = static_func1(input_ts1)
+        val2, ind2 = static_func2(input_ts2)
+
+        self.assertEqual(val1.shape, [2, 4])
+        self.assertEqual(ind1.shape, [2, 4])
+        self.assertEqual(val2.shape, [2, 3, 1])
+        self.assertEqual(ind2.shape, [2, 3, 1])
+
+    @unittest.skipIf(
+        not core.is_compiled_with_cuda(),
+        "core is not compiled with CUDA, skipping",
+    )
+    def test_static_unary_shape_infer_2(self):
+        # min/max with index is a GPU only op, no need for testing if there is no GPU
+
+        @paddle.jit.to_static(full_graph=True)
+        def static_func1(x):
+            dim = paddle.arange(0, 1).shape[0]
+            y = paddle.zeros([2, 3, 4])
+            return paddle._C_ops.max_with_index(y, dim, False, True)
+
+        @paddle.jit.to_static(full_graph=True)
+        def static_func2(x):
+            dim = paddle.arange(0, 2).shape[0]
+            y = paddle.zeros([2, 3, 4])
+            return paddle._C_ops.max_with_index(y, dim, True, True)
+
+        x1 = paddle.to_tensor([1])
+        x2 = paddle.to_tensor([1, 2])
+        val1, ind1 = static_func1(x1)
+        val2, ind2 = static_func2(x2)
+
+        self.assertEqual(val1.shape, [])
+        self.assertEqual(ind1.shape, [])
+        self.assertEqual(val2.shape, [1, 1, 1])
+        self.assertEqual(ind2.shape, [1, 1, 1])
+
 
 class TestCompatMax(TestCompatMinMaxBase):
     def __init__(self, *args, **kwargs):
diff --git a/test/legacy_test/test_zero_dim_sundry_dygraph_api.py b/test/legacy_test/test_zero_dim_sundry_dygraph_api.py
index b8debdcab006bb..29d3c5961d6241 100644
--- a/test/legacy_test/test_zero_dim_sundry_dygraph_api.py
+++ b/test/legacy_test/test_zero_dim_sundry_dygraph_api.py
@@ -600,6 +600,43 @@ def _make_compat_minmax_test(self, func_name):
         val.backward()
         self.assertEqual(x.grad.shape, [4, 5])
 
+    def test_minmax_with_index(self):
+        # min/max_with_index is a GPU only op
+        if not paddle.is_compiled_with_cuda():
+            return
+        # 1) x is 0D
+        x = paddle.to_tensor(1)
+        val1, ind1 = paddle._C_ops.min_with_index(x, 0, False, True)
+
+        self.assertEqual(val1.shape, [])
+        self.assertEqual(ind1.shape, [])
+        np.testing.assert_allclose(val1, 1)
+        np.testing.assert_allclose(ind1, 0)
+
+        # 2) x is 1D
+        x = paddle.to_tensor([1, 1, 1])
+        val1, ind1 = paddle._C_ops.max_with_index(x, 0, False, True)
+
+        self.assertEqual(val1.shape, [])
+        self.assertEqual(ind1.shape, [])
+        np.testing.assert_allclose(val1, 1)
+        np.testing.assert_allclose(ind1, 0)
+
+        # 3) x is 2D
+        x = paddle.zeros([2, 3])
+        val1, ind1 = paddle._C_ops.min_with_index(x, 1, False, True)
+        val2, ind2 = paddle._C_ops.max_with_index(x, 1, True, True)
+
+        self.assertEqual(val1.shape, [])
+        self.assertEqual(ind1.shape, [])
+        np.testing.assert_allclose(val1, 0)
+        np.testing.assert_allclose(ind1, 0)
+
+        self.assertEqual(val2.shape, [1, 1])
+        self.assertEqual(ind2.shape, [1, 1])
+        np.testing.assert_allclose(val2, 0)
+        np.testing.assert_allclose(ind2, 0)
+
     def test_compat_min(self):
         self._make_compat_minmax_test(paddle.compat.min)