use fast RunPrepareContext for inference

panyx0718 · panyx0718 · commit 4794d9cf7028 · 2018-08-30T09:59:21.000+08:00
diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc
@@ -21,6 +21,7 @@ limitations under the License. */
 #include <utility>
 #include <vector>
 
+#include "paddle/fluid/framework/feed_fetch_method.h"
 #include "paddle/fluid/inference/api/api_impl.h"
 #include "paddle/fluid/platform/profiler.h"
 
@@ -57,6 +58,27 @@ std::string num2str(T a) {
 }
 }  // namespace
 
+void NativePaddlePredictor::PrepareFeedFetch() {
+  for (auto *op : inference_program_->Block(0).AllOps()) {
+    if (op->Type() == "feed") {
+      int idx = boost::get<int>(op->GetAttr("col"));
+      if (feeds_.size() <= idx) {
+        feeds_.resize(idx + 1);
+      }
+      feeds_[idx] = op;
+      feed_names_[op->Output("Out")[0]] = idx;
+      LOG(ERROR) << "feed " << idx << " " << op->Output("Out")[0];
+    } else if (op->Type() == "fetch") {
+      int idx = boost::get<int>(op->GetAttr("col"));
+      if (fetchs_.size() <= idx) {
+        fetchs_.resize(idx + 1);
+      }
+      fetchs_[idx] = op;
+      LOG(ERROR) << "fetch " << idx << " " << op->Input("X")[0];
+    }
+  }
+}
+
 bool NativePaddlePredictor::Init(
     std::shared_ptr<framework::Scope> parent_scope) {
   VLOG(3) << "Predictor::init()";
@@ -108,8 +130,7 @@ bool NativePaddlePredictor::Init(
                              sub_scope_ ? sub_scope_ : scope_.get(), 0);
 
   // Get the feed_target_names and fetch_target_names
-  feed_target_names_ = inference_program_->GetFeedTargetNames();
-  fetch_target_names_ = inference_program_->GetFetchTargetNames();
+  PrepareFeedFetch();
   return true;
 }
 
@@ -130,36 +151,21 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
   Timer timer;
   timer.tic();
   // set feed variable
-  std::map<std::string, const framework::LoDTensor *> feed_targets;
   std::vector<framework::LoDTensor> feeds;
-  if (!SetFeed(inputs, &feeds)) {
+  framework::Scope *scope = sub_scope_ != nullptr ? sub_scope_ : scope_.get();
+  if (!SetFeed(inputs, scope)) {
     LOG(ERROR) << "fail to set feed";
     return false;
   }
-  for (size_t i = 0; i < feed_target_names_.size(); ++i) {
-    if (config_.specify_input_name) {
-      feed_targets[inputs[i].name] = &feeds[i];
-    } else {
-      feed_targets[feed_target_names_[i]] = &feeds[i];
-    }
-  }
-  // get fetch variable
-  std::map<std::string, framework::LoDTensor *> fetch_targets;
-  std::vector<framework::LoDTensor> fetchs;
-  fetchs.resize(fetch_target_names_.size());
-  for (size_t i = 0; i < fetch_target_names_.size(); ++i) {
-    fetch_targets[fetch_target_names_[i]] = &fetchs[i];
-  }
   // Run the inference program
   // if share variables, we need not create variables
   VLOG(4) << "Run prepared context";
-  executor_->RunPreparedContext(
-      ctx_.get(), sub_scope_ != nullptr ? sub_scope_ : scope_.get(),
-      &feed_targets, &fetch_targets,
-      false, /* don't create local scope each time*/
-      false /* don't create variable eatch time */);
+  executor_->RunPreparedContext(ctx_.get(), scope,
+                                false, /* don't create local scope each time*/
+                                false /* don't create variable eatch time */);
   VLOG(4) << "Finish prepared context";
-  if (!GetFetch(fetchs, output_data)) {
+  // get fetch variable
+  if (!GetFetch(output_data, scope)) {
     LOG(ERROR) << "fail to get fetches";
     return false;
   }
@@ -180,13 +186,13 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
 }
 
 bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
-                                    std::vector<framework::LoDTensor> *feeds) {
+                                    framework::Scope *scope) {
   VLOG(3) << "Predictor::set_feed";
-  if (inputs.size() != feed_target_names_.size()) {
+  if (inputs.size() != feeds_.size()) {
     LOG(ERROR) << "wrong feed input size.";
     return false;
   }
-  for (size_t i = 0; i < feed_target_names_.size(); ++i) {
+  for (size_t i = 0; i < inputs.size(); ++i) {
     framework::LoDTensor input;
     framework::DDim ddim = framework::make_ddim(inputs[i].shape);
     void *input_ptr;
@@ -208,29 +214,40 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
       lod.emplace_back(level);
     }
     input.set_lod(lod);
-
-    feeds->push_back(input);
+    int idx = -1;
+    if (config_.specify_input_name) {
+      idx =
+          boost::get<int>(feeds_[feed_names_[inputs[i].name]]->GetAttr("col"));
+    } else {
+      idx = boost::get<int>(feeds_[i]->GetAttr("col"));
+    }
+    framework::SetFeedVariable(scope, input, "feed", idx);
   }
   return true;
 }
 
-bool NativePaddlePredictor::GetFetch(
-    const std::vector<framework::LoDTensor> &fetchs,
-    std::vector<PaddleTensor> *outputs) {
+bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
+                                     framework::Scope *scope) {
   VLOG(3) << "Predictor::get_fetch";
-  outputs->resize(fetchs.size());
-  for (size_t i = 0; i < fetchs.size(); ++i) {
+  outputs->resize(fetchs_.size());
+  for (size_t i = 0; i < fetchs_.size(); ++i) {
+    std::string fetch_target_name = fetchs_[i]->Input("X")[0];
+    int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
+    PADDLE_ENFORCE(idx == i);
+    framework::LoDTensor &output =
+        framework::GetFetchVariable(*scope, "fetch", idx);
     // TODO(panyx0718): Support fetch of other types.
-    if (fetchs[i].type() != typeid(float)) {
+    if (output.type() != typeid(float)) {
       LOG(ERROR) << "only support fetching float now.";
       return false;
     }
+
     std::vector<int> shape;
-    auto dims_i = fetchs[i].dims();
-    auto lod = fetchs[i].lod();
-    const float *output_ptr = fetchs[i].data<float>();
+    auto dims_i = output.dims();
+    auto lod = output.lod();
+    const float *output_ptr = output.data<float>();
     // const int64_t* output_ptr = fetchs[i].data<int64_t>();
-    auto num = fetchs[i].numel();
+    auto num = output.numel();
     std::vector<float> data;
     if (0 == lod.size()) {
       std::copy(output_ptr, output_ptr + num, std::back_inserter(data));
@@ -275,7 +292,7 @@ bool NativePaddlePredictor::GetFetch(
     }
     std::memcpy(buffer.data(), data.data(), buffer.length());
     // copy LoD
-    for (const auto &level : fetchs[i].lod()) {
+    for (const auto &level : output.lod()) {
       outputs->at(i).lod.emplace_back(level);
     }
     outputs->at(i).dtype = PaddleDType::FLOAT32;
diff --git a/paddle/fluid/inference/api/api_impl.h b/paddle/fluid/inference/api/api_impl.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include <glog/logging.h>
+#include <map>
 #include <memory>
 #include <string>
 #include <vector>
@@ -47,18 +48,21 @@ class NativePaddlePredictor : public PaddlePredictor {
 
  protected:
   bool SetFeed(const std::vector<PaddleTensor> &input_datas,
-               std::vector<framework::LoDTensor> *feeds);
-  bool GetFetch(const std::vector<framework::LoDTensor> &fetchs,
-                std::vector<PaddleTensor> *output_data);
+               framework::Scope *scope);
+  bool GetFetch(std::vector<PaddleTensor> *output_data,
+                framework::Scope *scope);
+
+  void PrepareFeedFetch();
 
   NativeConfig config_;
   platform::Place place_;
   std::unique_ptr<framework::Executor> executor_;
   std::shared_ptr<framework::Scope> scope_;
   std::unique_ptr<framework::ExecutorPrepareContext> ctx_;
   std::unique_ptr<framework::ProgramDesc> inference_program_;
-  std::vector<std::string> feed_target_names_;
-  std::vector<std::string> fetch_target_names_;
+  std::vector<framework::OpDesc *> feeds_;
+  std::map<std::string, size_t> feed_names_;
+  std::vector<framework::OpDesc *> fetchs_;
   // Do not use unique_ptr, use parent scope to delete
   framework::Scope *sub_scope_{nullptr};
 };
diff --git a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
@@ -74,10 +74,8 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
     VLOG(5) << "to create variables";
     executor_->CreateVariables(*inference_program_,
                                sub_scope_ ? sub_scope_ : scope_.get(), 0);
-
     // Get the feed_target_names and fetch_target_names
-    feed_target_names_ = inference_program_->GetFeedTargetNames();
-    fetch_target_names_ = inference_program_->GetFetchTargetNames();
+    PrepareFeedFetch();
     return true;
   }
 
diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc
@@ -21,6 +21,8 @@ limitations under the License. */
 #include "paddle/fluid/inference/tests/test_helper.h"
 #include "paddle/fluid/platform/cpu_helper.h"
 
+#include "paddle/fluid/framework/feed_fetch_method.h"
+
 DEFINE_string(model_path, "", "Directory of the inference model.");
 DEFINE_string(data_file, "", "File of input index data.");
 DEFINE_int32(repeat, 100, "Running the inference program repeat times");
@@ -124,14 +126,35 @@ void ThreadRunInfer(
   std::map<std::string, const paddle::framework::LoDTensor*> feed_targets;
   PADDLE_ENFORCE_EQ(feed_target_names.size(), 1UL);
 
+  // map the data of feed_targets to feed_holder
+  for (auto* op : inference_program->Block(0).AllOps()) {
+    if (op->Type() == "feed") {
+      std::string feed_target_name = op->Output("Out")[0];
+      int idx = boost::get<int>(op->GetAttr("col"));
+      paddle::framework::SetFeedVariable(scope, *feed_targets[feed_target_name],
+                                         "feed", idx);
+    }
+  }
+
   auto& inputs = jobs[tid];
   auto start_ms = GetCurrentMs();
   for (size_t i = 0; i < inputs.size(); ++i) {
     feed_targets[feed_target_names[0]] = inputs[i];
-    executor.RunPreparedContext(ctx.get(), &sub_scope, &feed_targets,
-                                &fetch_targets, false /*create_local_scope*/);
+    executor.RunPreparedContext(ctx.get(), &sub_scope,
+                                false /*create_local_scope*/);
   }
   auto stop_ms = GetCurrentMs();
+
+  // obtain the data of fetch_targets from fetch_holder
+  for (auto* op : inference_program->Block(0).AllOps()) {
+    if (op->Type() == "fetch") {
+      std::string fetch_target_name = op->Input("X")[0];
+      int idx = boost::get<int>(op->GetAttr("col"));
+      *fetch_targets[fetch_target_name] =
+          paddle::framework::GetFetchVariable(*scope, "fetch", idx);
+    }
+  }
+
   scope->DeleteScope(&sub_scope);
   LOG(INFO) << "Tid: " << tid << ", process " << inputs.size()
             << " samples, avg time per sample: "