add argsort op and delete unecessary file, test=develop (#5740)

BillDior · web-flow · commit 1f2b8c182c1b · 2021-03-17T16:05:23.000+08:00
fix linspace and argsort bugs, test=develop

fix argsort and add 2 rank input reduce_max &amp;&amp; reduce_min
diff --git a/lite/backends/arm/math/CMakeLists.txt b/lite/backends/arm/math/CMakeLists.txt
@@ -126,6 +126,7 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
       beam_search.cc
       reduce_max.cc
       reduce_min.cc
+      reduce_max_min.cc
       sequence_pool.cc
       sequence_pool_grad.cc
       sequence_expand.cc
diff --git a/lite/backends/arm/math/funcs.h b/lite/backends/arm/math/funcs.h
@@ -54,6 +54,7 @@
 #include "lite/backends/arm/math/prior_box.h"
 #include "lite/backends/arm/math/quantize.h"
 #include "lite/backends/arm/math/reduce_max.h"
+#include "lite/backends/arm/math/reduce_max_min.h"
 #include "lite/backends/arm/math/reduce_mean.h"
 #include "lite/backends/arm/math/reduce_min.h"
 #include "lite/backends/arm/math/reduce_prod.h"
diff --git a/lite/backends/arm/math/reduce_max_min.cc b/lite/backends/arm/math/reduce_max_min.cc
@@ -0,0 +1,65 @@
+/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "lite/backends/arm/math/reduce_max_min.h"
+#include <utility>
+#include <vector>
+#include "lite/backends/arm/math/funcs.h"
+#include "lite/core/tensor.h"
+
+namespace paddle {
+namespace lite {
+namespace arm {
+namespace math {
+
+template <>
+void reduce_second_of_two<float>(const float* src,
+                                 float* dst,
+                                 int first_in,
+                                 int second_in,
+                                 MaxMinType max_min_selector) {
+  // max_min_selector == true, do reduce max; else do reduce min
+  for (int j = 0; j < second_in; j++) {
+    dst[j * first_in] = src[j * first_in];
+    for (int k = 1; k < first_in; k++) {
+      dst[j * first_in] = (src[j * first_in + k] <= dst[j * first_in]) ^
+                                  static_cast<bool>(max_min_selector)
+                              ? src[j * first_in + k]
+                              : dst[j * first_in];
+    }
+  }
+}
+
+template <>
+void reduce_first_of_two<float>(const float* src,
+                                float* dst,
+                                int first_in,
+                                int second_in,
+                                MaxMinType max_min_selector) {
+  // max_min_selector == true, do reduce max; else do reduce min
+  for (int j = 0; j < first_in; j++) {
+    dst[j] = src[j];
+    for (int k = 1; k < second_in; k++) {
+      dst[j] = (src[j + k * first_in] <= dst[j]) ^
+                       static_cast<bool>(max_min_selector)
+                   ? src[j + k * first_in]
+                   : dst[j];
+    }
+  }
+}
+
+}  // namespace math
+}  // namespace arm
+}  // namespace lite
+}  // namespace paddle
diff --git a/lite/backends/arm/math/reduce_max_min.h b/lite/backends/arm/math/reduce_max_min.h
@@ -0,0 +1,40 @@
+/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+namespace paddle {
+namespace lite {
+namespace arm {
+namespace math {
+
+enum class MaxMinType : bool { kMin = false, kMax = true };
+template <typename DataType>
+void reduce_first_of_two(const float* src,
+                         float* dst,
+                         int first_in,
+                         int second_in,
+                         MaxMinType compare_functor);
+
+template <typename DataType>
+void reduce_second_of_two(const float* src,
+                          float* dst,
+                          int first_in,
+                          int second_in,
+                          MaxMinType max_min_selector);
+
+}  // namespace math
+}  // namespace arm
+}  // namespace lite
+}  // namespace paddle
diff --git a/lite/kernels/arm/reduce_max_compute.cc b/lite/kernels/arm/reduce_max_compute.cc
@@ -13,7 +13,9 @@
 // limitations under the License.
 
 #include "lite/kernels/arm/reduce_max_compute.h"
+
 #include <string>
+
 #include "lite/backends/arm/math/funcs.h"
 
 namespace paddle {
@@ -104,9 +106,36 @@ void ReduceMaxCompute::Run() {
     } else {
       LOG(FATAL) << "dim's size over than 2, which is not supported now!!";
     }
+  } else if (x_dims.size() == 2) {
+    int first_in = x_dims[0];
+    int second_in = x_dims[1];
+    if (dim.size() == 1) {
+      switch (dim[0]) {
+        case 0:
+          lite::arm::math::reduce_first_of_two<float>(
+              input,
+              output,
+              first_in,
+              second_in,
+              lite::arm::math::MaxMinType::kMax);
+          break;
+        case 1:
+          lite::arm::math::reduce_second_of_two<float>(
+              input,
+              output,
+              first_in,
+              second_in,
+              lite::arm::math::MaxMinType::kMax);
+          break;
+        default:
+          LOG(FATAL) << "error!!!";
+      }
+    } else {
+      LOG(FATAL) << "dim's size over than 1, which is not supported now!!";
+    }  // x_dims == 2 && dim.size() == 1
   } else {
-    LOG(FATAL) << "only support input with 3&4 dimensions now!!";
-  }
+    LOG(FATAL) << "only support input with 2&3&4 dimensions now!!";
+  }  // x_dims == 2
 }
 
 }  // namespace arm
diff --git a/lite/kernels/arm/reduce_min_compute.cc b/lite/kernels/arm/reduce_min_compute.cc
@@ -104,6 +104,31 @@ void ReduceMinCompute::Run() {
     } else {
       LOG(FATAL) << "dim's size over than 2, which is not supported now!!";
     }
+  } else if (x_dims.size() == 2) {
+    int first_in = x_dims[0];
+    int second_in = x_dims[1];
+    if (dim.size() == 1) {
+      switch (dim[0]) {
+        case 0:
+          lite::arm::math::reduce_first_of_two<float>(
+              input,
+              output,
+              first_in,
+              second_in,
+              lite::arm::math::MaxMinType::kMin);
+          break;
+        case 1:
+          lite::arm::math::reduce_second_of_two<float>(
+              input,
+              output,
+              first_in,
+              second_in,
+              lite::arm::math::MaxMinType::kMin);
+          break;
+        default:
+          LOG(FATAL) << "error!!!";
+      }
+    }
   } else {
     LOG(FATAL) << "only support input with 3&4 dimensions now!!";
   }
diff --git a/lite/kernels/host/CMakeLists.txt b/lite/kernels/host/CMakeLists.txt
@@ -64,6 +64,7 @@ add_kernel(fill_any_like_compute_host Host extra SRCS fill_any_like_compute.cc D
 add_kernel(meshgrid_compute_host Host extra SRCS meshgrid_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(linspace_compute_host Host extra SRCS linspace_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(tril_triu_compute_host Host extra SRCS tril_triu_compute.cc DEPS ${lite_kernel_deps})
+add_kernel(argsort Host extra SRCS argsort_compute.cc DEPS ${lite_kernel_deps})
 
 if(LITE_BUILD_EXTRA AND LITE_WITH_x86)
   lite_cc_test(test_where_index_compute_host SRCS where_index_compute.cc DEPS where_index_compute_host)
diff --git a/lite/kernels/host/argsort_compute.cc b/lite/kernels/host/argsort_compute.cc
@@ -0,0 +1,68 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/host/argsort_compute.h"
+
+using argsort_fp32_compute = paddle::lite::kernels::host::ArgsortCompute<float>;
+REGISTER_LITE_KERNEL(
+    argsort, kHost, kFloat, kAny, argsort_fp32_compute, argsort_fp32)
+    .BindInput("X",
+               {LiteType::GetTensorTy(TARGET(kHost),
+                                      PRECISION(kFloat),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Indices",
+                {LiteType::GetTensorTy(TARGET(kHost),
+                                       PRECISION(kInt64),
+                                       DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kHost),
+                                       PRECISION(kFloat),
+                                       DATALAYOUT(kAny))})
+    .Finalize();
+
+using argsort_int32_compute =
+    paddle::lite::kernels::host::ArgsortCompute<int32_t>;
+REGISTER_LITE_KERNEL(
+    argsort, kHost, kFloat, kAny, argsort_int32_compute, argsort_int32)
+    .BindInput("X",
+               {LiteType::GetTensorTy(TARGET(kHost),
+                                      PRECISION(kInt32),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Indices",
+                {LiteType::GetTensorTy(TARGET(kHost),
+                                       PRECISION(kInt64),
+                                       DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kHost),
+                                       PRECISION(kInt32),
+                                       DATALAYOUT(kAny))})
+    .Finalize();
+
+using argsort_int64_compute =
+    paddle::lite::kernels::host::ArgsortCompute<int64_t>;
+REGISTER_LITE_KERNEL(
+    argsort, kHost, kFloat, kAny, argsort_int64_compute, argsort_int64)
+    .BindInput("X",
+               {LiteType::GetTensorTy(TARGET(kHost),
+                                      PRECISION(kInt64),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Indices",
+                {LiteType::GetTensorTy(TARGET(kHost),
+                                       PRECISION(kInt64),
+                                       DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kHost),
+                                       PRECISION(kInt64),
+                                       DATALAYOUT(kAny))})
+    .Finalize();
diff --git a/lite/kernels/host/argsort_compute.h b/lite/kernels/host/argsort_compute.h
@@ -0,0 +1,89 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lite/core/kernel.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace host {
+
+template <typename DataType>
+class ArgsortCompute
+    : public KernelLite<TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny)> {
+ public:
+  using param_t = operators::ArgsortParam;
+
+  void Run() {
+    auto& param = Param<operators::ArgsortParam>();
+    const DataType* x_data = param.X->template data<DataType>();
+    DataType* out_val = param.Out->template mutable_data<DataType>();
+    auto out_ind = param.Indices->template mutable_data<int64_t>();
+    DDim x_dims = param.X->dims();
+    int axis = param.axis;
+    int dim_size = x_dims.size();
+    bool descending = param.descending;
+    if (axis < 0) {
+      axis += dim_size;
+    }
+
+    int outer_size = x_dims.count(0, axis);
+    int axis_size = x_dims[axis];
+    int inner_size = x_dims.count(axis + 1, dim_size);
+    int sort_size = axis_size * inner_size;
+#pragma omp parallel for
+    for (int n = 0; n < outer_size; n++) {
+      const DataType* in_data = x_data + n * sort_size;
+      DataType* out_data = out_val + n * sort_size;
+      int64_t* out_ind_data = out_ind + n * sort_size;
+      for (int i = 0; i < inner_size; i++) {
+        std::vector<std::pair<DataType, int>> vec;
+        vec.resize(axis_size);
+        for (int j = 0; j < axis_size; j++) {
+          vec[j] = std::make_pair(in_data[j * inner_size + i], j);
+        }
+        if (descending) {
+          std::sort(vec.begin(),
+                    vec.end(),
+                    [](std::pair<DataType, int> a, std::pair<DataType, int> b) {
+                      return a.first > b.first;
+                    });
+        } else {
+          std::sort(vec.begin(),
+                    vec.end(),
+                    [](std::pair<DataType, int> a, std::pair<DataType, int> b) {
+                      return a.first < b.first;
+                    });
+        }
+        for (int j = 0; j < axis_size; j++) {
+          out_data[j * inner_size + i] = vec[j].first;
+          out_ind_data[j * inner_size + i] = vec[j].second;
+        }
+      }
+    }
+  }
+
+  virtual ~ArgsortCompute() = default;
+};
+
+}  // namespace host
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
diff --git a/lite/operators/CMakeLists.txt b/lite/operators/CMakeLists.txt
@@ -146,6 +146,7 @@ add_operator(tile_op extra SRCS tile_op.cc DEPS ${op_DEPS})
 add_operator(meshgrid_op_lite extra SRCS meshgrid_op.cc DEPS ${op_DEPS})
 add_operator(linspace_op extra SRCS linspace_op.cc DEPS ${op_DEPS})
 add_operator(tril_triu_op extra SRCS tril_triu_op.cc DEPS ${op_DEPS})
+add_operator(argsort_op extra SRCS argsort_op.cc DEPS ${op_DEPS})
 
 # for OCR specific
 add_operator(while_op extra SRCS while_op.cc DEPS ${op_DEPS})
diff --git a/lite/operators/argsort_op.cc b/lite/operators/argsort_op.cc
diff --git a/lite/operators/argsort_op.h b/lite/operators/argsort_op.h
diff --git a/lite/operators/linspace_op.cc b/lite/operators/linspace_op.cc
diff --git a/lite/operators/op_params.h b/lite/operators/op_params.h