Bug Fix: Paddle-TRT cannot handle adaptive pooling in pool2d op converter and "num" attribute in split op converter (#20733) (#20902)

Pei Yang · web-flow · commit 1948210cf499 · 2019-10-31T12:24:43.000+08:00
* fix pool2d trt converter, test=develop

* add fix for split op converter, test=develop
diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -213,7 +213,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
   for (auto *x : node->inputs) {
     if (x->IsVar() && x->Var()) {
       framework::VarDesc *var = x->Var();
-      SetAttr(op_desc->Proto(), var->Name() + "_shape", var->GetShape());
+      op_desc->SetAttr(var->Name() + "_shape", var->GetShape());
     }
   }
 
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -507,7 +507,6 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
     }
   }
   if (config.glog_info_disabled()) {
-    google::InitGoogleLogging("Init");
     FLAGS_logtostderr = 1;
     FLAGS_minloglevel = google::WARNING;
     LOG(WARNING) << " - GLOG's LOG(INFO) is disabled.";
diff --git a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
-#include "paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.h"
+#include "paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h"
 
 namespace paddle {
 namespace inference {
@@ -75,12 +75,19 @@ class Pool2dOpConverter : public OpConverter {
     std::vector<int> paddings =
         boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
     bool ceil_mode = boost::get<bool>(op_desc.GetAttr("ceil_mode"));
+    bool adaptive = false;
+    if (op_desc.HasAttr("adaptive"))
+      adaptive = boost::get<bool>(op_desc.GetAttr("adaptive"));
 
     nvinfer1::PoolingType nv_pool_type = nvinfer1::PoolingType::kMAX;
+    plugin::PoolPlugin::PoolType plugin_pool_type =
+        plugin::PoolPlugin::PoolType::max;
     if (pool_type == "max") {
       nv_pool_type = nvinfer1::PoolingType::kMAX;
+      plugin_pool_type = plugin::PoolPlugin::PoolType::max;
     } else if (pool_type == "avg") {
       nv_pool_type = nvinfer1::PoolingType::kAVERAGE;
+      plugin_pool_type = plugin::PoolPlugin::PoolType::avg;
     } else {
       PADDLE_THROW("TensorRT unsupported pooling type!");
     }
@@ -108,7 +115,7 @@ class Pool2dOpConverter : public OpConverter {
       return;
     }
 
-    if (pool_type == "max") {
+    if (!adaptive && pool_type == "max") {
       // Under ceil mode, the pre_pad and post_pad are used to
       // record the the padding size. In some ceil mode cases,
       // we do not need padding, so we initialize the two vars to 0.
@@ -141,10 +148,13 @@ class Pool2dOpConverter : public OpConverter {
       for (int i = 0; i < input_dims; i++) {
         input_shape_v.push_back(input_shape.d[i]);
       }
-      plugin::AvgPoolPlugin *plugin = new plugin::AvgPoolPlugin(
-          ceil_mode, ksize, strides, paddings, input_shape_v);
-      auto *avg_pool_layer = engine_->AddPlugin(&input1, 1, plugin);
-      layer = avg_pool_layer;
+      plugin::PoolPlugin *plugin =
+          new plugin::PoolPlugin(ceil_mode, plugin_pool_type, adaptive, ksize,
+                                 strides, paddings, input_shape_v);
+      PADDLE_ENFORCE_NOT_NULL(plugin->getPluginType(),
+                              "The plugin used must not be null");
+      auto *pool_layer = engine_->AddPlugin(&input1, 1, plugin);
+      layer = pool_layer;
     }
 
     auto output_name = op_desc.Output("Out")[0];
diff --git a/paddle/fluid/inference/tensorrt/convert/split_op.cc b/paddle/fluid/inference/tensorrt/convert/split_op.cc
@@ -35,12 +35,23 @@ class SplitOpConverter : public OpConverter {
     // Get Attrs
     PADDLE_ENFORCE(input_num == 1);
     int axis = boost::get<int>(op_desc.GetAttr("axis"));
-    std::vector<int> output_lengths =
-        boost::get<std::vector<int>>(op_desc.GetAttr("sections"));
     // split on batch is not supported in TensorRT
     PADDLE_ENFORCE(axis != 0);
     axis += (axis < 0) ? input_dims.nbDims : -1;
-
+    std::vector<int> output_lengths =
+        boost::get<std::vector<int>>(op_desc.GetAttr("sections"));
+    output_lengths.reserve(output_num);
+    int num = boost::get<int>(op_desc.GetAttr("num"));
+    if (num > 0) {
+      int64_t in_axis_dim = input_dims.d[axis];
+      PADDLE_ENFORCE_EQ(in_axis_dim % num, 0,
+                        "Tensor split does not result"
+                        " in an equal division");
+      size_t out_axis_dim = in_axis_dim / num;
+      for (size_t i = 0; i < output_num; ++i) {
+        output_lengths.push_back(out_axis_dim);
+      }
+    }
     PADDLE_ENFORCE(output_lengths.size() == output_num);
     plugin::SplitPlugin* plugin = new plugin::SplitPlugin(axis, output_lengths);
     nvinfer1::IPluginLayer* layer =
diff --git a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
@@ -1,5 +1,5 @@
 nv_library(tensorrt_plugin
            SRCS trt_plugin.cc split_op_plugin.cu elementwise_op_plugin.cu
            prelu_op_plugin.cu  trt_plugin_factory.cc
-           avg_pool_op_plugin.cu swish_op_plugin.cu
+           pool_op_plugin.cu swish_op_plugin.cu
            DEPS enforce tensorrt_engine prelu)
diff --git a/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.h"
+#include "paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h"
 #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h"
 #include "paddle/fluid/operators/math/pooling.h"
 
@@ -21,14 +21,14 @@ namespace inference {
 namespace tensorrt {
 namespace plugin {
 
-AvgPoolPlugin* CreateAvgPoolPluginDeserialize(const void* buffer,
-                                              size_t length) {
-  return new AvgPoolPlugin(buffer, length);
+PoolPlugin* CreatePoolPluginDeserialize(const void* buffer, size_t length) {
+  return new PoolPlugin(buffer, length);
 }
-REGISTER_TRT_PLUGIN("avg_pool_plugin", CreateAvgPoolPluginDeserialize);
+REGISTER_TRT_PLUGIN("pool_plugin", CreatePoolPluginDeserialize);
 
-nvinfer1::Dims AvgPoolPlugin::getOutputDimensions(
-    int index, const nvinfer1::Dims* inputDims, int nbInputs) {
+nvinfer1::Dims PoolPlugin::getOutputDimensions(int index,
+                                               const nvinfer1::Dims* inputDims,
+                                               int nbInputs) {
   assert(nbInputs == 1);
   assert(index == 0);
   assert(inputDims[0].nbDims == 3);
@@ -41,26 +41,33 @@ nvinfer1::Dims AvgPoolPlugin::getOutputDimensions(
   return output_dims;
 }
 
-int AvgPoolPlugin::enqueue(int batchSize, const void* const* inputs,
-                           void** outputs, void* workspace,
-                           cudaStream_t stream) {
+int PoolPlugin::enqueue(int batchSize, const void* const* inputs,
+                        void** outputs, void* workspace, cudaStream_t stream) {
   auto const& input_dims = this->getInputDims(0);
   int input_size = 0;
   float const* idata = reinterpret_cast<float const*>(inputs[0]);
   float** odatas = reinterpret_cast<float**>(outputs);
 
-  paddle::operators::math::AvgPool<float> pool_process;
-  paddle::operators::math::Pool2dDirectCUDAFunctor<
-      paddle::operators::math::AvgPool<float>, float>
-      pool2d_forward;
-
   std::vector<int> input_shape = input_shape_;
   std::vector<int> output_shape = output_shape_;
   input_shape.insert(input_shape.begin(), batchSize);
   output_shape.insert(output_shape.begin(), batchSize);
 
-  pool2d_forward(idata, input_shape, output_shape, ksize_, strides_, paddings_,
-                 pool_process, true, odatas[0], stream);
+  if (pool_type_ == PoolType::max) {
+    paddle::operators::math::MaxPool<float> pool_process;
+    paddle::operators::math::Pool2dDirectCUDAFunctor<
+        paddle::operators::math::MaxPool<float>, float>
+        pool2d_forward;
+    pool2d_forward(idata, input_shape, output_shape, ksize_, strides_,
+                   paddings_, pool_process, true, adaptive_, odatas[0], stream);
+  } else if (pool_type_ == PoolType::avg) {
+    paddle::operators::math::AvgPool<float> pool_process;
+    paddle::operators::math::Pool2dDirectCUDAFunctor<
+        paddle::operators::math::AvgPool<float>, float>
+        pool2d_forward;
+    pool2d_forward(idata, input_shape, output_shape, ksize_, strides_,
+                   paddings_, pool_process, true, adaptive_, odatas[0], stream);
+  }
 
   return cudaGetLastError() != cudaSuccess;
 }
diff --git a/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h
@@ -13,7 +13,9 @@
 // limitations under the License.
 
 #pragma once
+#include <stdio.h>
 #include <cassert>
+#include <string>
 #include <vector>
 #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
 
@@ -22,18 +24,11 @@ namespace inference {
 namespace tensorrt {
 namespace plugin {
 
-class AvgPoolPlugin : public PluginTensorRT {
- private:
-  bool ceil_mode_;
-  std::vector<int> ksize_;
-  std::vector<int> strides_;
-  std::vector<int> paddings_;
-  std::vector<int> input_shape_;
-  std::vector<int> output_shape_;
-
+class PoolPlugin : public PluginTensorRT {
  protected:
   size_t getSerializationSize() override {
     return SerializedSize(getPluginType()) + SerializedSize(ceil_mode_) +
+           SerializedSize(pool_type_) + SerializedSize(adaptive_) +
            SerializedSize(ksize_) + SerializedSize(strides_) +
            SerializedSize(paddings_) + SerializedSize(input_shape_) +
            SerializedSize(output_shape_) + getBaseSerializationSize();
@@ -45,6 +40,8 @@ class AvgPoolPlugin : public PluginTensorRT {
     SerializeValue(&buffer, getPluginType());
     serializeBase(buffer);
     SerializeValue(&buffer, ceil_mode_);
+    SerializeValue(&buffer, pool_type_);
+    SerializeValue(&buffer, adaptive_);
     SerializeValue(&buffer, ksize_);
     SerializeValue(&buffer, strides_);
     SerializeValue(&buffer, paddings_);
@@ -53,60 +50,83 @@ class AvgPoolPlugin : public PluginTensorRT {
   }
 
  public:
-  AvgPoolPlugin() {}
-  AvgPoolPlugin(bool ceil_mode, std::vector<int> ksize,
-                std::vector<int> strides, std::vector<int> paddings,
-                std::vector<int> input_shape)
+  enum class PoolType {
+    max = 0,
+    avg,
+  };
+  PoolPlugin() {}
+  PoolPlugin(bool ceil_mode, PoolType pool_type, bool adaptive,
+             std::vector<int> ksize, std::vector<int> strides,
+             std::vector<int> paddings, std::vector<int> input_shape)
       : ceil_mode_(ceil_mode),
+        pool_type_(pool_type),
+        adaptive_(adaptive),
         ksize_(ksize),
         strides_(strides),
         paddings_(paddings),
         input_shape_(input_shape) {
-    int output_h, output_w;
     output_shape_ = input_shape_;
-    if (!ceil_mode_) {
-      output_h =
-          (input_shape[1] - ksize_[0] + 2 * paddings_[0]) / strides_[0] + 1;
-      output_w =
-          (input_shape[2] - ksize_[1] + 2 * paddings_[1]) / strides_[1] + 1;
+    if (adaptive_) {
+      output_shape_[1] = ksize[0];
+      output_shape_[2] = ksize[1];
     } else {
-      output_h =
-          (input_shape[1] - ksize_[0] + 2 * paddings_[0] + strides_[0] - 1) /
-              strides_[0] +
-          1;
-      output_w =
-          (input_shape[2] - ksize_[1] + 2 * paddings_[1] + strides_[1] - 1) /
-              strides_[1] +
-          1;
+      int output_h, output_w;
+      if (!ceil_mode_) {
+        output_h =
+            (input_shape[1] - ksize_[0] + 2 * paddings_[0]) / strides_[0] + 1;
+        output_w =
+            (input_shape[2] - ksize_[1] + 2 * paddings_[1]) / strides_[1] + 1;
+      } else {
+        output_h =
+            (input_shape[1] - ksize_[0] + 2 * paddings_[0] + strides_[0] - 1) /
+                strides_[0] +
+            1;
+        output_w =
+            (input_shape[2] - ksize_[1] + 2 * paddings_[1] + strides_[1] - 1) /
+                strides_[1] +
+            1;
+      }
+      output_shape_[1] = output_h;
+      output_shape_[2] = output_w;
     }
-    output_shape_[1] = output_h;
-    output_shape_[2] = output_w;
   }
 
   // It was used for tensorrt deserialization.
   // It should not be called by users.
-  AvgPoolPlugin(void const *serialData, size_t serialLength) {
+  PoolPlugin(void const *serialData, size_t serialLength) {
     deserializeBase(serialData, serialLength);
     DeserializeValue(&serialData, &serialLength, &ceil_mode_);
+    DeserializeValue(&serialData, &serialLength, &pool_type_);
+    DeserializeValue(&serialData, &serialLength, &adaptive_);
     DeserializeValue(&serialData, &serialLength, &ksize_);
     DeserializeValue(&serialData, &serialLength, &strides_);
     DeserializeValue(&serialData, &serialLength, &paddings_);
     DeserializeValue(&serialData, &serialLength, &input_shape_);
     DeserializeValue(&serialData, &serialLength, &output_shape_);
   }
 
-  AvgPoolPlugin *clone() const override {
-    return new AvgPoolPlugin(ceil_mode_, ksize_, strides_, paddings_,
-                             input_shape_);
+  PoolPlugin *clone() const override {
+    return new PoolPlugin(ceil_mode_, pool_type_, adaptive_, ksize_, strides_,
+                          paddings_, input_shape_);
   }
 
-  const char *getPluginType() const override { return "avg_pool_plugin"; }
+  const char *getPluginType() const override { return "pool_plugin"; }
   int getNbOutputs() const override { return 1; }
   nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims *inputs,
                                      int nbInputDims) override;
   int initialize() override { return 0; }
   int enqueue(int batchSize, const void *const *inputs, void **outputs,
               void *workspace, cudaStream_t stream) override;
+
+ private:
+  bool ceil_mode_;
+  PoolType pool_type_;
+  bool adaptive_;
+  std::vector<int> ksize_;
+  std::vector<int> strides_;
+  std::vector<int> paddings_;
+  std::vector<int> input_shape_;
+  std::vector<int> output_shape_;
 };
 
 }  // namespace plugin
diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt
@@ -268,6 +268,10 @@ if(WITH_GPU AND TENSORRT_FOUND)
     if (NOT EXISTS ${TRT_MODEL_INSTALL_DIR})
         inference_download_and_uncompress(${TRT_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_inference_test_models.tar.gz")
     endif()
+    set(TEST_SPLIT_CONVERTER_MODEL "${TRT_MODEL_INSTALL_DIR}/trt_split_op_converter_test")
+    if (NOT EXISTS ${TEST_SPLIT_CONVERTER_MODEL})
+        inference_download_and_uncompress(${TEST_SPLIT_CONVERTER_MODEL} ${INFERENCE_URL}/tensorrt_test "split_converter.tgz")
+    endif()
     inference_analysis_test(trt_mobilenet_test SRCS trt_mobilenet_test.cc
             EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
             ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
@@ -283,6 +287,9 @@ if(WITH_GPU AND TENSORRT_FOUND)
     inference_analysis_test(trt_cascade_rcnn_test SRCS trt_cascade_rcnn_test.cc
             EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
             ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
+    inference_analysis_test(trt_split_converter_test SRCS trt_split_converter_test.cc
+            EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} 
+            ARGS --infer_model=${TEST_SPLIT_CONVERTER_MODEL}/)
     inference_analysis_test(test_analyzer_capi_gpu SRCS analyzer_capi_gpu_tester.cc
             EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
             ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
diff --git a/paddle/fluid/inference/tests/api/trt_split_converter_test.cc b/paddle/fluid/inference/tests/api/trt_split_converter_test.cc
diff --git a/paddle/fluid/operators/math/pooling.cu b/paddle/fluid/operators/math/pooling.cu
diff --git a/paddle/fluid/operators/math/pooling.h b/paddle/fluid/operators/math/pooling.h

Original file line number	Diff line number	Diff line change
`@@ -213,7 +213,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(`
`213`	`213`	`for (auto *x : node->inputs) {`
`214`	`214`	`if (x->IsVar() && x->Var()) {`
`215`	`215`	`framework::VarDesc *var = x->Var();`
`216`		`- SetAttr(op_desc->Proto(), var->Name() + "_shape", var->GetShape());`
	`216`	`+ op_desc->SetAttr(var->Name() + "_shape", var->GetShape());`
`217`	`217`	`}`
`218`	`218`	`}`
`219`	`219`
Original file line number	Diff line number	Diff line change
`@@ -507,7 +507,6 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<`
`507`	`507`	`}`
`508`	`508`	`}`
`509`	`509`	`if (config.glog_info_disabled()) {`
`510`		`- google::InitGoogleLogging("Init");`
`511`	`510`	`FLAGS_logtostderr = 1;`
`512`	`511`	`FLAGS_minloglevel = google::WARNING;`
`513`	`512`	`LOG(WARNING) << " - GLOG's LOG(INFO) is disabled.";`