PaddlePaddle
diff --git a/‎paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/inference/api/analysis_predictor.cc
Lines changed: 2 additions & 0 deletions b/‎paddle/fluid/inference/api/analysis_predictor.cc
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
Lines changed: 6 additions & 3 deletions b/‎paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
Lines changed: 6 additions & 3 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
Lines changed: 118 additions & 70 deletions b/‎paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
Lines changed: 118 additions & 70 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
Lines changed: 2 additions & 1 deletion b/‎paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
Lines changed: 2 additions & 1 deletion
diff --git a/‎paddle/fluid/inference/tensorrt/convert/prelu_op.cc
Lines changed: 80 additions & 0 deletions b/‎paddle/fluid/inference/tensorrt/convert/prelu_op.cc
Lines changed: 80 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/convert/split_op.cc
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/inference/tensorrt/convert/split_op.cc
Lines changed: 1 addition & 1 deletion
@@ -45,7 +45,7 @@ void IrAnalysisComposePass::InitTensorRTAttrs(Argument *argument) {
       std::unordered_set<std::string> teller_set(
           {"mul", "conv2d", "pool2d", "relu", "softmax", "sigmoid",
            "depthwise_conv2d", "batch_norm", "concat", "tanh", "pad",
-           "elementwise_add", "dropout", "split"});
+           "elementwise_add", "dropout", "split", "prelu", "conv2d_transpose"});
       if (!node->IsOp()) return false;
 
       if (teller_set.count(node->Op()->Type())) {
 
@@ -549,4 +549,6 @@ USE_TRT_CONVERTER(concat);
 USE_TRT_CONVERTER(dropout);
 USE_TRT_CONVERTER(pad);
 USE_TRT_CONVERTER(split);
+USE_TRT_CONVERTER(prelu);
+USE_TRT_CONVERTER(conv2d_transpose);
 #endif
@@ -2,7 +2,7 @@
 nv_library(tensorrt_converter
   SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc
 batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc
-pad_op.cc split_op.cc
+pad_op.cc split_op.cc prelu_op.cc
   DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
 
 nv_test(test_op_converter SRCS test_op_converter.cc DEPS
@@ -16,7 +16,7 @@ nv_test(test_trt_fc_op SRCS test_fc_op.cc fc_op.cc
 nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
         DEPS ${FLUID_CORE_MODULES} tensorrt_engine activation_op SERIAL)
 nv_test(test_trt_conv_op SRCS test_conv2d_op.cc conv2d_op.cc
-        DEPS ${FLUID_CORE_MODULES} tensorrt_engine conv_op SERIAL)
+        DEPS ${FLUID_CORE_MODULES} tensorrt_engine conv_op conv_transpose_op SERIAL)
 nv_test(test_trt_pool2d_op SRCS test_pool2d_op.cc pool2d_op.cc
         DEPS ${FLUID_CORE_MODULES} tensorrt_engine pool_op SERIAL)
 nv_test(test_trt_elementwise_op SRCS test_elementwise_op.cc elementwise_op.cc
@@ -33,4 +33,7 @@ nv_test(test_trt_pad_op SRCS test_pad_op.cc pad_op.cc
         DEPS ${FLUID_CORE_MODULES} tensorrt_engine pad_op SERIAL)
 nv_test(test_trt_split_op SRCS test_split_op.cc split_op.cc
         DEPS ${FLUID_CORE_MODULES} tensorrt_engine tensorrt_plugin
-split_op concat_op SERIAL)
+        split_op concat_op SERIAL)
+nv_test(test_trt_prelu_op SRCS test_prelu_op.cc prelu_op.cc
+        DEPS ${FLUID_CORE_MODULES} tensorrt_engine tensorrt_plugin
+        prelu_op SERIAL)
@@ -18,92 +18,139 @@ namespace paddle {
 namespace inference {
 namespace tensorrt {
 
-bool to_skip_merging_optimize(TensorRTEngine* engine_,
+bool to_skip_merging_optimize(TensorRTEngine* engine,
                               const std::vector<int>& filters,
                               const std::vector<int>& strides,
                               const std::vector<int>& paddings,
                               std::string input_name) {
-  if (engine_->itensor_quote_num[input_name] > 0) {
+  if (engine->itensor_quote_num[input_name] > 0) {
     return true;
   }
   if (filters[0] == 1 && filters[1] == 1 && strides[0] == 1 &&
       strides[1] == 1 && paddings[0] == 0 && paddings[1] == 0)
-    engine_->itensor_quote_num[input_name] += 1;
+    engine->itensor_quote_num[input_name] += 1;
 
   return false;
 }
 
+template <typename RegistFunc, typename SetDilationFunc>
+void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op,
+                   const framework::Scope& scope, bool test_mode,
+                   RegistFunc fadd_layer, SetDilationFunc fset_dilation,
+                   const std::string& name) {
+  VLOG(3) << "convert a fluid " << name << " op to tensorrt layer without bias";
+
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1);
+  PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1);  // Y is a weight
+  PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1);
+
+  PADDLE_ENFORCE(engine != nullptr);
+  auto* X = engine->GetITensor(op_desc.Input("Input").front());
+
+  // Declare weights
+  auto* Y_v = scope.FindVar(op_desc.Input("Filter").front());
+  PADDLE_ENFORCE_NOT_NULL(Y_v);
+  auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
+
+  platform::CPUPlace cpu_place;
+  std::unique_ptr<framework::LoDTensor> weight_tensor(
+      new framework::LoDTensor());
+  weight_tensor->Resize(Y_t->dims());
+  TensorCopySync((*Y_t), cpu_place, weight_tensor.get());
+
+  auto* weight_data = weight_tensor->mutable_data<float>(platform::CPUPlace());
+
+  PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
+  const int n_output = weight_tensor->dims()[0];
+  const int n_input = weight_tensor->dims()[1];
+  const int filter_h = weight_tensor->dims()[2];
+  const int filter_w = weight_tensor->dims()[3];
+  const int groups = boost::get<int>(op_desc.GetAttr("groups"));
+  const std::vector<int> dilations =
+      boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
+  const std::vector<int> strides =
+      boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
+  const std::vector<int> paddings =
+      boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
+
+  nvinfer1::DimsHW nv_ksize(filter_h, filter_w);
+  nvinfer1::DimsHW nv_dilations(dilations[0], dilations[1]);
+  nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
+  nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
+
+  TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT,
+                                static_cast<void*>(weight_data),
+                                static_cast<size_t>(weight_tensor->numel())};
+
+  TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
+  auto* layer = fadd_layer(const_cast<nvinfer1::ITensor*>(X), n_output, n_input,
+                           nv_ksize, weight, bias);
+  PADDLE_ENFORCE(layer != nullptr);
+  layer->setStride(nv_strides);
+  layer->setPadding(nv_paddings);
+  layer->setNbGroups(groups);
+  // set dilations
+  fset_dilation(layer, nv_dilations);
+
+  auto output_name = op_desc.Output("Output").front();
+  layer->setName((name + " (Output: " + output_name + ")").c_str());
+  engine->weight_map[op_desc.Input("Filter").front()] =
+      std::move(weight_tensor);
+  layer->getOutput(0)->setName(output_name.c_str());
+  engine->SetITensor(output_name, layer->getOutput(0));
+
+  if (test_mode ||
+      to_skip_merging_optimize(engine, {filter_h, filter_w}, strides, paddings,
+                               op_desc.Input("Input").front())) {
+    engine->DeclareOutput(output_name);
+  }
+}
+
 class Conv2dOpConverter : public OpConverter {
  public:
   void operator()(const framework::proto::OpDesc& op,
                   const framework::Scope& scope, bool test_mode) override {
-    VLOG(3) << "convert a fluid conv2d op to tensorrt conv layer without bias";
-
-    framework::OpDesc op_desc(op, nullptr);
-    PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1);
-    PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1);  // Y is a weight
-    PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1);
-
-    auto* X = engine_->GetITensor(op_desc.Input("Input").front());
-
-    // Declare weights
-    auto* Y_v = scope.FindVar(op_desc.Input("Filter").front());
-    PADDLE_ENFORCE_NOT_NULL(Y_v);
-    auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
-
-    platform::CPUPlace cpu_place;
-    std::unique_ptr<framework::LoDTensor> weight_tensor(
-        new framework::LoDTensor());
-    weight_tensor->Resize(Y_t->dims());
-    TensorCopySync((*Y_t), cpu_place, weight_tensor.get());
-
-    auto* weight_data =
-        weight_tensor->mutable_data<float>(platform::CPUPlace());
-
-    PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
-    const int n_output = weight_tensor->dims()[0];
-    const int filter_h = weight_tensor->dims()[2];
-    const int filter_w = weight_tensor->dims()[3];
-
-    const int groups = boost::get<int>(op_desc.GetAttr("groups"));
-    const std::vector<int> dilations =
-        boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
-    const std::vector<int> strides =
-        boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
-    const std::vector<int> paddings =
-        boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
-
-    nvinfer1::DimsHW nv_ksize(filter_h, filter_w);
-    nvinfer1::DimsHW nv_dilations(dilations[0], dilations[1]);
-    nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
-    nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
-
-    TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT,
-                                  static_cast<void*>(weight_data),
-                                  weight_tensor->memory_size() / sizeof(float)};
-
-    TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
-    auto* layer = TRT_ENGINE_ADD_LAYER(
-        engine_, Convolution, *const_cast<nvinfer1::ITensor*>(X), n_output,
-        nv_ksize, weight.get(), bias.get());
-    PADDLE_ENFORCE(layer != nullptr);
-    layer->setStride(nv_strides);
-    layer->setPadding(nv_paddings);
-    layer->setDilation(nv_dilations);
-    layer->setNbGroups(groups);
-
-    auto output_name = op_desc.Output("Output").front();
-    layer->setName(("conv2d (Output: " + output_name + ")").c_str());
-    engine_->weight_map[op_desc.Input("Filter").front()] =
-        std::move(weight_tensor);
-    layer->getOutput(0)->setName(output_name.c_str());
-    engine_->SetITensor(output_name, layer->getOutput(0));
-
-    if (test_mode ||
-        to_skip_merging_optimize(engine_, {filter_h, filter_w}, strides,
-                                 paddings, op_desc.Input("Input").front())) {
-      engine_->DeclareOutput(output_name);
-    }
+    ConvertConv2d(
+        engine_, op, scope, test_mode,
+        [&](nvinfer1::ITensor* inputs, int n_output, /* Conv output maps */
+            int n_input,                             /* Conv input maps */
+            nvinfer1::DimsHW& ksize, TensorRTEngine::Weight& weight,
+            TensorRTEngine::Weight& bias) -> nvinfer1::IConvolutionLayer* {
+          auto* layer =
+              TRT_ENGINE_ADD_LAYER(engine_, Convolution, *inputs, n_output,
+                                   ksize, weight.get(), bias.get());
+          return layer;
+        },
+        [](nvinfer1::IConvolutionLayer* layer, nvinfer1::DimsHW& dilations) {
+          layer->setDilation(dilations);
+        },
+        "conv2d");
+  }
+};
+
+class Deconv2dOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+    ConvertConv2d(
+        engine_, op, scope, test_mode,
+        [&](nvinfer1::ITensor* inputs, int n_output, /* Deconv input maps */
+            int n_input,                             /* Deconv output maps */
+            nvinfer1::DimsHW& ksize, TensorRTEngine::Weight& weight,
+            TensorRTEngine::Weight& bias) -> nvinfer1::IDeconvolutionLayer* {
+          auto* layer =
+              TRT_ENGINE_ADD_LAYER(engine_, Deconvolution, *inputs, n_input,
+                                   ksize, weight.get(), bias.get());
+          return layer;
+        },
+        [](nvinfer1::IDeconvolutionLayer* layer, nvinfer1::DimsHW& dilations) {
+          PADDLE_ENFORCE(
+              dilations.d[0] == 1 && dilations.d[1] == 1,
+              "Dilations must be (1, 1) for tensorRT, but given (%d, %d)",
+              dilations.d[0], dilations.d[1]);
+        },
+        "conv2d_transpose");
   }
 };
 
@@ -112,3 +159,4 @@ class Conv2dOpConverter : public OpConverter {
 }  // namespace paddle
 
 REGISTER_TRT_OP_CONVERTER(conv2d, Conv2dOpConverter);
+REGISTER_TRT_OP_CONVERTER(conv2d_transpose, Deconv2dOpConverter);
@@ -34,7 +34,8 @@ class ElementwiseWeightOpConverter : public OpConverter {
 
     auto* X = engine_->GetITensor(op_desc.Input("X").front());
     nvinfer1::Dims dims_x = X->getDimensions();
-    PADDLE_ENFORCE(dims_x.nbDims >= 3);
+    PADDLE_ENFORCE(dims_x.nbDims >= 3, "x dims experts 3, but %d is given.",
+                   dims_x.nbDims);
 
     auto* Y_v = scope.FindVar(op_desc.Input("Y").front());
     PADDLE_ENFORCE_NOT_NULL(Y_v);
 
@@ -0,0 +1,80 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+#include "paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h"
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+/*
+ * PRelu converter from fluid to tensorRT.
+ */
+class PReluOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+    VLOG(4) << "convert fluid prelu op to tensorrt prelu layer";
+
+    framework::OpDesc op_desc(op, nullptr);
+    // Declare inputs
+    int input_num = op_desc.Input("X").size();
+    PADDLE_ENFORCE(input_num == 1);
+    auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
+    // Get output
+    size_t output_num = op_desc.Output("Out").size();
+    PADDLE_ENFORCE(output_num == 1);
+    // Get attrs
+    std::string mode = boost::get<std::string>(op_desc.GetAttr("mode"));
+    //
+    auto* alpha_var = scope.FindVar(op_desc.Input("Alpha")[0]);
+    PADDLE_ENFORCE_NOT_NULL(alpha_var);
+    auto* alpha_tensor = alpha_var->GetMutable<framework::LoDTensor>();
+
+    platform::CUDAPlace place;
+    std::unique_ptr<framework::LoDTensor> alpha_tensor_device(
+        new framework::LoDTensor());
+    alpha_tensor_device->Resize(alpha_tensor->dims());
+    TensorCopySync(*alpha_tensor, place, alpha_tensor_device.get());
+    float* alpha_data = alpha_tensor_device->mutable_data<float>(place);
+
+    // Transform alpha to TensorRTEngine::Weight
+    TensorRTEngine::Weight alpha_rt(nvinfer1::DataType::kFLOAT,
+                                    static_cast<void*>(alpha_data),
+                                    alpha_tensor_device->numel());
+    PReluPlugin* plugin = new PReluPlugin(alpha_rt, mode);
+    nvinfer1::IPluginLayer* layer =
+        engine_->AddPlugin(&input, input_num, plugin);
+    // keep alpha tensor to avoid release it's memory
+    engine_->weight_map[op_desc.Input("Alpha")[0]] =
+        std::move(alpha_tensor_device);
+
+    std::string layer_name = "prelu (Output: ";
+    auto output_name = op_desc.Output("Out")[0];
+    layer->getOutput(0)->setName(output_name.c_str());
+    engine_->SetITensor(output_name, layer->getOutput(0));
+    layer_name += output_name;
+    if (test_mode) {
+      engine_->DeclareOutput(output_name);
+    }
+    layer->setName((layer_name + ")").c_str());
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(prelu, PReluOpConverter);
@@ -26,7 +26,7 @@ class SplitOpConverter : public OpConverter {
  public:
   void operator()(const framework::proto::OpDesc& op,
                   const framework::Scope& scope, bool test_mode) override {
-    VLOG(40) << "convert a fluid split op to tensorrt split layer";
+    VLOG(4) << "convert a fluid split op to tensorrt split layer";
 
     framework::OpDesc op_desc(op, nullptr);
     // Declare inputs