1. support mutil batch utest 2. support pool op

NHZlX · NHZlX · commit 01566fb61b66 · 2018-07-25T06:25:03.000Z
diff --git a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
@@ -43,6 +43,8 @@ class Pool2dOpConverter : public OpConverter {
     const nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
     const nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
 
+    PADDLE_ENFORCE_EQ(input1->getDimensions().nbDims, 3UL);
+
     nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kMAX;
     if (pool_type == "max") {
       pool_t = nvinfer1::PoolingType::kMAX;
diff --git a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
@@ -23,7 +23,8 @@ namespace tensorrt {
 TEST(ReluOpConverter, main) {
   framework::Scope scope;
   std::unordered_set<std::string> parameters;
-  TRTConvertValidation validator(10, parameters, scope, 1000);
+  int runtime_batch = 3;
+  TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
   validator.DeclInputVar("relu-X", nvinfer1::Dims2(10, 6));
   validator.DeclOutputVar("relu-Out", nvinfer1::Dims2(10, 6));
 
@@ -37,7 +38,7 @@ TEST(ReluOpConverter, main) {
   validator.SetOp(*desc.Proto());
   LOG(INFO) << "execute";
 
-  validator.Execute(1);
+  validator.Execute(runtime_batch);
 }
 
 }  // namespace tensorrt
diff --git a/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc b/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
@@ -23,10 +23,10 @@ namespace tensorrt {
 TEST(fc_op, test) {
   std::unordered_set<std::string> parameters({"mul-Y"});
   framework::Scope scope;
-  TRTConvertValidation validator(10, parameters, scope, 1000);
-  validator.DeclInputVar("mul-X", nvinfer1::Dims4(1, 10, 1, 1));
+  int runtime_batch = 2;
+  TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
+  validator.DeclInputVar("mul-X", nvinfer1::Dims3(10, 1, 1));
   validator.DeclParamVar("mul-Y", nvinfer1::Dims2(10, 2));
-  // validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8, 2));
   validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(1, 2));
 
   // Prepare Op description
@@ -38,7 +38,7 @@ TEST(fc_op, test) {
 
   validator.SetOp(*desc.Proto());
 
-  validator.Execute(1);
+  validator.Execute(runtime_batch);
 }
 
 }  // namespace tensorrt
diff --git a/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc b/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc
@@ -23,7 +23,8 @@ namespace tensorrt {
 TEST(MulOpConverter, main) {
   framework::Scope scope;
   std::unordered_set<std::string> parameters;
-  TRTConvertValidation validator(10, parameters, scope, 1000);
+  int runtime_batch = 0;
+  TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
   validator.DeclInputVar("mul-X", nvinfer1::Dims2(10, 6));
   validator.DeclInputVar("mul-Y", nvinfer1::Dims2(6, 10));
   validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(10, 10));
diff --git a/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc
@@ -23,9 +23,14 @@ namespace tensorrt {
 TEST(Pool2dOpConverter, main) {
   framework::Scope scope;
   std::unordered_set<std::string> parameters;
-  TRTConvertValidation validator(10, parameters, scope, 1000);
-  validator.DeclInputVar("pool2d-X", nvinfer1::Dims4(10, 3, 2, 2));
-  validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims4(10, 3, 1, 1));
+  int runtime_batch = 3;
+  TRTConvertValidation validator(5, parameters, scope, 1 << 15, runtime_batch);
+
+  // We have already set the runtime batchsize, so the
+  // Dims should not contain the batch size.
+  // The ITensor's Dims of input and output should be C * H * W.
+  validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 4, 4));
+  validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 2, 2));
 
   // Prepare Op description
   framework::OpDesc desc;
@@ -34,26 +39,20 @@ TEST(Pool2dOpConverter, main) {
   desc.SetOutput("Out", {"pool2d-Out"});
 
   std::vector<int> ksize({2, 2});
-  std::vector<int> strides({1, 1});
+  std::vector<int> strides({2, 2});
   std::vector<int> paddings({0, 0});
   std::string pooling_t = "max";
 
   desc.SetAttr("pooling_type", pooling_t);
   desc.SetAttr("ksize", ksize);
   desc.SetAttr("strides", strides);
   desc.SetAttr("paddings", paddings);
-  // std::string temp = "";
-  // (*desc.Proto()).SerializeToString(&temp);
-
-  // std::cout << temp << std::endl;
-  // std::ofstream f("__temp__", std::ios::out);
-  // f << temp;
 
   LOG(INFO) << "set OP";
   validator.SetOp(*desc.Proto());
   LOG(INFO) << "execute";
 
-  validator.Execute(10);
+  validator.Execute(runtime_batch);
 }
 
 }  // namespace tensorrt
diff --git a/paddle/fluid/inference/tensorrt/convert/ut_helper.h b/paddle/fluid/inference/tensorrt/convert/ut_helper.h
@@ -63,13 +63,15 @@ class TRTConvertValidation {
  public:
   TRTConvertValidation() = delete;
 
-  TRTConvertValidation(int batch_size,
+  TRTConvertValidation(int max_batch_size,
                        const std::unordered_set<std::string>& parameters,
                        framework::Scope& scope,  // NOLINT
-                       int workspace_size = 1 << 10)
-      : parameters_(parameters), scope_(scope) {
+                       int workspace_size = 1 << 10, int runtime_batch_size = 1)
+      : parameters_(parameters),
+        scope_(scope),
+        runtime_batch_size_(runtime_batch_size) {
     // create engine.
-    engine_.reset(new TensorRTEngine(batch_size, workspace_size, &stream_));
+    engine_.reset(new TensorRTEngine(max_batch_size, workspace_size, &stream_));
     engine_->InitNetwork();
 
     PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0);
@@ -84,20 +86,28 @@ class TRTConvertValidation {
 
   // Declare a parameter varaible in the scope.
   void DeclParamVar(const std::string& name, const nvinfer1::Dims& dims) {
-    DeclVar(name, dims);
+    DeclVar(name, dims, true);
   }
 
   void DeclOutputVar(const std::string& name, const nvinfer1::Dims& dims) {
     DeclVar(name, dims);
   }
 
   // Declare a variable in a fluid Scope.
-  void DeclVar(const std::string& name, const nvinfer1::Dims& dims) {
+  void DeclVar(const std::string& name, const nvinfer1::Dims& dims,
+               bool is_param = false) {
     platform::CPUPlace place;
     platform::CPUDeviceContext ctx(place);
 
     // Init Fluid tensor.
     std::vector<int> dim_vec(dims.d, dims.d + dims.nbDims);
+    // There is no batchsize in ITensor's shape, but We should add it to
+    // tensor's
+    // shape of fluid. If the variable is not parameter and the batch size
+    // greater than 0,
+    // add the batchsize to dim_vec.
+    if (is_param != true && runtime_batch_size_ > 0)
+      dim_vec.insert(dim_vec.begin(), runtime_batch_size_);
     auto* x = scope_.Var(name);
     auto* x_tensor = x->GetMutable<framework::LoDTensor>();
     x_tensor->Resize(framework::make_ddim(dim_vec));
@@ -167,6 +177,10 @@ class TRTConvertValidation {
   std::unique_ptr<framework::OpDesc> op_desc_;
   const std::unordered_set<std::string>& parameters_;
   framework::Scope& scope_;
+  //  It represents the runtime batchsize when we test.
+  //  If the value greater than 0, we add this to
+  //  the first dimension of tensor's shape of fluid.
+  int runtime_batch_size_;
 };
 
 }  // namespace tensorrt
diff --git a/paddle/fluid/inference/tensorrt/test_engine.cc b/paddle/fluid/inference/tensorrt/test_engine.cc
@@ -113,7 +113,7 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
   ASSERT_EQ(y_cpu[1], 14.5);
 }
 
-TEST_F(TensorRTEngineTest, test_conv2d_temp) {
+TEST_F(TensorRTEngineTest, test_conv2d) {
   // Weight in CPU memory.
   float raw_weight[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
   float raw_bias[1] = {0};
@@ -146,6 +146,37 @@ TEST_F(TensorRTEngineTest, test_conv2d_temp) {
   ASSERT_EQ(y_cpu[1], 6.0);
 }
 
+TEST_F(TensorRTEngineTest, test_pool2d) {
+  // Weight in CPU memory.
+  auto* x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
+                                  nvinfer1::Dims3{1, 2, 2});
+
+  nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kAVERAGE;
+  auto* pool_layer =
+      TRT_ENGINE_ADD_LAYER(engine_, Pooling, *const_cast<nvinfer1::ITensor*>(x),
+                           pool_t, nvinfer1::DimsHW{2, 2});
+
+  PADDLE_ENFORCE(pool_layer != nullptr);
+  pool_layer->setStride(nvinfer1::DimsHW{1, 1});
+  pool_layer->setPadding(nvinfer1::DimsHW{0, 0});
+
+  engine_->DeclareOutput(pool_layer, 0, "y");
+  engine_->FreezeNetwork();
+  ASSERT_EQ(engine_->engine()->getNbBindings(), 2);
+
+  float x_v[8] = {1.0, 2.0, 5.0, 0.0, 2.0, 3.0, 5.0, 10.0};
+  engine_->SetInputFromCPU("x", reinterpret_cast<void*>(&x_v),
+                           8 * sizeof(float));
+  engine_->Execute(2);
+
+  LOG(INFO) << "to get output";
+  float* y_cpu = new float[2];
+  engine_->GetOutputInCPU("y", &y_cpu[0], 2 * sizeof(float));
+
+  ASSERT_EQ(y_cpu[0], 2.0);
+  ASSERT_EQ(y_cpu[1], 5.0);
+}
+
 }  // namespace tensorrt
 }  // namespace inference
 }  // namespace paddle