PaddlePaddle · Xreki · Jun 22, 2021 · Mar 25, 2021 · Mar 29, 2021 · Apr 19, 2021
diff --git a/paddle/fluid/operators/reduce_ops/reduce_functor_op.h b/paddle/fluid/operators/reduce_ops/reduce_functor_op.h
@@ -13,46 +13,98 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
-#include <string>
-#include <vector>
+#include <math.h>
+#include <limits>
 
-#include "paddle/fluid/framework/eigen.h"
-#include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/operators/amp/fp16_type_traits.h"
-#include "paddle/fluid/platform/device_context.h"
-#include "paddle/fluid/platform/hostdevice.h"
-#include "paddle/fluid/platform/macros.h"
+#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
+#ifdef __HIPCC__
+#include <hip/hip_runtime.h>
+#endif
 
 namespace paddle {
 namespace operators {
 
-template <typename T>
+template <typename Tx, typename Ty = Tx>
 struct CustomMin {
-  __device__ __forceinline__ T operator()(const T &a, const T &b) const {
+  using Transformer = detail::IdentityFunctor<Tx>;
+
+  HOSTDEVICE __forceinline__ Ty initial() {
+    return std::numeric_limits<Ty>::max();
+  }
+
+  __device__ __forceinline__ Ty operator()(const Ty &a, const Ty &b) const {
     return (b < a) ? b : a;
   }
 };
 
-template <typename T>
+template <typename Tx, typename Ty = Tx>
 struct CustomMax {
-  __device__ __forceinline__ T operator()(const T &a, const T &b) const {
+  using Transformer = detail::IdentityFunctor<Tx>;
+
+  HOSTDEVICE __forceinline__ Ty initial() {
+    return std::numeric_limits<Ty>::lowest();
+  }
+
+  __device__ __forceinline__ Ty operator()(const Ty &a, const Ty &b) const {
     return (b > a) ? b : a;
   }
 };
 
-template <typename T>
+// for cub::Reduce
+template <typename Tx, typename Ty = Tx>
 struct CustomSum {
-  __device__ __forceinline__ T operator()(const T &a, const T &b) const {
+  using Transformer = detail::IdentityFunctor<Tx, Ty>;
+
+  HOSTDEVICE __forceinline__ Ty initial() { return static_cast<Ty>(0.0f); }
+
+  __device__ __forceinline__ Ty operator()(const Ty &a, const Ty &b) const {
+    return b + a;
+  }
+};
+
+template <typename Tx, typename Ty = Tx>
+struct CustomMean {
+  using Transformer = detail::DivideFunctor<Tx>;
+
+  HOSTDEVICE __forceinline__ Ty initial() { return static_cast<Ty>(0.0f); }
+
+  __device__ __forceinline__ Ty operator()(const Ty &a, const Ty &b) const {
     return b + a;
   }
 };
 
-template <typename T>
+template <typename Tx, typename Ty = Tx>
 struct CustomMul {
-  __device__ __forceinline__ T operator()(const T &a, const T &b) const {
+  using Transformer = detail::IdentityFunctor<Tx>;
+
+  HOSTDEVICE __forceinline__ Ty initial() { return static_cast<Ty>(1.0f); }
+
+  __device__ __forceinline__ Ty operator()(const Ty &a, const Ty &b) const {
     return b * a;
   }
 };
 
+template <typename Tx, typename Ty = Tx>
+struct CustomLogicalOr {
+  using Transformer = detail::IdentityFunctor<Tx>;
+
+  HOSTDEVICE __forceinline__ Ty initial() { return static_cast<Ty>(false); }
+
+  __device__ __forceinline__ Ty operator()(const Ty &a, const Ty &b) const {
+    return b || a;
+  }
+};
+
+template <typename Tx, typename Ty = Tx>
+struct CustomLogicalAnd {
+  using Transformer = detail::IdentityFunctor<Tx>;
+
+  HOSTDEVICE __forceinline__ Ty initial() { return static_cast<Ty>(true); }
+
+  __device__ __forceinline__ Ty operator()(const Ty &a, const Ty &b) const {
+    return b && a;
+  }
+};
+
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op.cu b/paddle/fluid/operators/reduce_ops/reduce_max_op.cu
@@ -11,15 +11,13 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include "paddle/fluid/operators/reduce_ops/reduce_functor_op.h"
+#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
+#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
 
-#include "paddle/fluid/operators/reduce_ops/reduce_min_max_op.h"
-
-REGISTER_OP_CUDA_KERNEL(reduce_max,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          float, ops::MaxFunctor>,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          double, ops::MaxFunctor>,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          int, ops::MaxFunctor>,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          int64_t, ops::MaxFunctor>);
+// reduce_max
+REGISTER_OP_CUDA_KERNEL(
+    reduce_max, ops::ReduceCudaKernel<float, paddle::operators::CustomMax>,
+    ops::ReduceCudaKernel<double, paddle::operators::CustomMax>,
+    ops::ReduceCudaKernel<int, paddle::operators::CustomMax>,
+    ops::ReduceCudaKernel<int64_t, paddle::operators::CustomMax>);
diff --git a/paddle/fluid/operators/reduce_ops/reduce_min_op.cu b/paddle/fluid/operators/reduce_ops/reduce_min_op.cu
@@ -11,15 +11,13 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include "paddle/fluid/operators/reduce_ops/reduce_functor_op.h"
+#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
+#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
 
-#include "paddle/fluid/operators/reduce_ops/reduce_min_max_op.h"
-
-REGISTER_OP_CUDA_KERNEL(reduce_min,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          float, ops::MinFunctor>,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          double, ops::MinFunctor>,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          int, ops::MinFunctor>,
-                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
-                                          int64_t, ops::MinFunctor>);
+// reduce_min
+REGISTER_OP_CUDA_KERNEL(
+    reduce_min, ops::ReduceCudaKernel<float, paddle::operators::CustomMin>,
+    ops::ReduceCudaKernel<double, paddle::operators::CustomMin>,
+    ops::ReduceCudaKernel<int, paddle::operators::CustomMin>,
+    ops::ReduceCudaKernel<int64_t, paddle::operators::CustomMin>);