PaddlePaddle
diff --git a/‎paddle/contrib/inference/CMakeLists.txt
Lines changed: 9 additions & 1 deletion b/‎paddle/contrib/inference/CMakeLists.txt
Lines changed: 9 additions & 1 deletion
diff --git a/‎paddle/contrib/inference/paddle_inference_api.h
Lines changed: 8 additions & 3 deletions b/‎paddle/contrib/inference/paddle_inference_api.h
Lines changed: 8 additions & 3 deletions
diff --git a/‎paddle/contrib/inference/paddle_inference_api_impl.cc
Lines changed: 5 additions & 1 deletion b/‎paddle/contrib/inference/paddle_inference_api_impl.cc
Lines changed: 5 additions & 1 deletion
diff --git a/‎paddle/contrib/inference/paddle_inference_api_impl.h
Lines changed: 1 addition & 1 deletion b/‎paddle/contrib/inference/paddle_inference_api_impl.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/contrib/inference/paddle_inference_api_tensorrt_subgraph_engine.cc
Lines changed: 126 additions & 0 deletions b/‎paddle/contrib/inference/paddle_inference_api_tensorrt_subgraph_engine.cc
Lines changed: 126 additions & 0 deletions
diff --git a/‎paddle/contrib/inference/test_paddle_inference_api_tensorrt_subgraph_engine.cc
Lines changed: 64 additions & 0 deletions b/‎paddle/contrib/inference/test_paddle_inference_api_tensorrt_subgraph_engine.cc
Lines changed: 64 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/analysis/CMakeLists.txt
Lines changed: 8 additions & 4 deletions b/‎paddle/fluid/inference/analysis/CMakeLists.txt
Lines changed: 8 additions & 4 deletions
diff --git a/‎paddle/fluid/inference/analysis/analyzer.cc
Lines changed: 82 additions & 0 deletions b/‎paddle/fluid/inference/analysis/analyzer.cc
Lines changed: 82 additions & 0 deletions
@@ -18,7 +18,7 @@ if(APPLE)
 endif(APPLE)
 
 
-set(inference_deps paddle_inference_api paddle_fluid_api)
+set(inference_deps paddle_inference_api paddle_fluid_api paddle_inference_tensorrt_subgraph_engine)
 
 function(inference_api_test TARGET_NAME)
     if (WITH_TESTING)
@@ -50,6 +50,14 @@ cc_test(test_paddle_inference_api
 inference_api_test(test_paddle_inference_api_impl
                     ARGS test_word2vec test_image_classification)
 
+if(WITH_GPU AND TENSORRT_FOUND)
+cc_library(paddle_inference_tensorrt_subgraph_engine
+        SRCS paddle_inference_api_tensorrt_subgraph_engine.cc
+        DEPS paddle_inference_api analysis tensorrt_engine paddle_inference_api paddle_fluid_api)
+
+inference_api_test(test_paddle_inference_api_tensorrt_subgraph_engine ARGS test_word2vec)
+endif()
+
 if (WITH_ANAKIN AND WITH_TESTING) # only needed in CI
     # Due to Anakin do not have official library releases and the versions of protobuf and cuda do not match Paddle's,
     # so anakin library will not be merged to our official inference library. To use anakin prediction API, one need to
 
@@ -73,12 +73,12 @@ struct PaddleTensor {
 };
 
 enum class PaddleEngineKind {
-  kNative = 0,  // Use the native Fluid facility.
-  kAnakin,      // Use Anakin for inference.
+  kNative = 0,         // Use the native Fluid facility.
+  kAnakin,             // Use Anakin for inference.
+  kAutoMixedTensorRT,  // Automatically mix Fluid with TensorRT.
   // TODO(Superjomn) support following engines latter.
   // kTensorRT,           // Use TensorRT for inference.
   // kAutoMixedAnakin,    // Automatically mix Fluid with Anakin.
-  // kAutoMixedTensorRT,  // Automatically mix Fluid with TensorRT.
 };
 
 /*
@@ -130,6 +130,11 @@ struct AnakinConfig : public PaddlePredictor::Config {
   int max_batch_size{-1};
 };
 
+struct TensorRTConfig : public NativeConfig {
+  // Determine whether a subgraph will be executed by TRT.
+  int min_subgraph_size{1};
+};
+
 // A factory to help create different predictors.
 //
 // FOR EXTENSION DEVELOPER:
 
@@ -89,6 +89,7 @@ bool NativePaddlePredictor::Init(
     LOG(ERROR) << "fail to load inference model.";
     return false;
   }
+
   ctx_ = executor_->Prepare(*inference_program_, 0);
   executor_->CreateVariables(
       *inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0);
@@ -119,6 +120,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
     return false;
   }
   for (size_t i = 0; i < feed_target_names_.size(); ++i) {
+    VLOG(4) << "setting " << i << "-th target";
     feed_targets[feed_target_names_[i]] = &feeds[i];
   }
   // get fetch variable
@@ -130,14 +132,16 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
   }
   // Run the inference program
   // if share variables, we need not create variables
+  VLOG(4) << "Run prepared context";
   executor_->RunPreparedContext(
       ctx_.get(),
       sub_scope_ != nullptr ? sub_scope_ : scope_.get(),
       &feed_targets,
       &fetch_targets,
       false /* don't create variable eatch time */);
+  VLOG(4) << "Finish prepared context";
   if (!GetFetch(fetchs, output_data)) {
-    LOG(ERROR) << "fail to get fetchs";
+    LOG(ERROR) << "fail to get fetches";
     return false;
   }
   VLOG(3) << "predict cost: " << timer.toc() << "ms";
 
@@ -44,7 +44,7 @@ class NativePaddlePredictor : public PaddlePredictor {
 
   ~NativePaddlePredictor() override;
 
- private:
+ protected:
   bool SetFeed(const std::vector<PaddleTensor> &input_datas,
                std::vector<framework::LoDTensor> *feeds);
   bool GetFetch(const std::vector<framework::LoDTensor> &fetchs,
 
@@ -0,0 +1,126 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/contrib/inference/paddle_inference_api.h"
+#include "paddle/contrib/inference/paddle_inference_api_impl.h"
+#include "paddle/fluid/inference/analysis/analyzer.h"
+#include "paddle/fluid/inference/utils/singleton.h"
+
+namespace paddle {
+
+using inference::analysis::Argument;
+using inference::Singleton;
+using inference::analysis::Analyzer;
+using framework::proto::ProgramDesc;
+
+class TensorRTSubgraphPredictor : public NativePaddlePredictor {
+ public:
+  explicit TensorRTSubgraphPredictor(const TensorRTConfig& config)
+      : NativePaddlePredictor(config), config_(config) {}
+
+  bool Init(const std::shared_ptr<framework::Scope>& parent_scope) {
+    VLOG(3) << "Predictor::init()";
+
+    if (config_.use_gpu) {
+      place_ = paddle::platform::CUDAPlace(config_.device);
+    } else {
+      place_ = paddle::platform::CPUPlace();
+    }
+    if (parent_scope) {
+      scope_ = parent_scope;
+      sub_scope_ = &(parent_scope->NewScope());
+    } else {
+      paddle::framework::InitDevices(false);
+      scope_.reset(new paddle::framework::Scope());
+    }
+
+    executor_.reset(new paddle::framework::Executor(place_));
+
+    // Initialize the inference program
+    if (!config_.model_dir.empty()) {
+      // Parameters are saved in separate files sited in
+      // the specified `dirname`.
+      inference_program_ = paddle::inference::Load(
+          executor_.get(), scope_.get(), config_.model_dir);
+    } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
+      // All parameters are saved in a single file.
+      // The file names should be consistent with that used
+      // in Python API `fluid.io.save_inference_model`.
+      inference_program_ = paddle::inference::Load(
+          executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
+    } else {
+      LOG(ERROR) << "fail to load inference model.";
+      return false;
+    }
+
+    // Analyze inference_program
+    Argument argument;
+    argument.origin_program_desc.reset(
+        new ProgramDesc(*inference_program_->Proto()));
+    Singleton<Analyzer>::Global().Run(&argument);
+    CHECK(argument.transformed_program_desc);
+    VLOG(5) << "transformed program:\n"
+            << argument.transformed_program_desc->SerializeAsString();
+    VLOG(5) << "to prepare executor";
+    *inference_program_->Proto() = *argument.transformed_program_desc;
+    ctx_ = executor_->Prepare(*inference_program_, 0);
+
+    VLOG(5) << "to create variables";
+    executor_->CreateVariables(
+        *inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0);
+
+    // Get the feed_target_names and fetch_target_names
+    feed_target_names_ = inference_program_->GetFeedTargetNames();
+    fetch_target_names_ = inference_program_->GetFetchTargetNames();
+    return true;
+  }
+
+ private:
+  TensorRTConfig config_;
+};
+
+template <>
+std::unique_ptr<PaddlePredictor>
+CreatePaddlePredictor<TensorRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
+    const TensorRTConfig& config) {
+  VLOG(3) << "create TensorRTSubgraphPredictor";
+  if (config.use_gpu) {
+    // 1. GPU memeroy
+    PADDLE_ENFORCE_GT(
+        config.fraction_of_gpu_memory,
+        0.f,
+        "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
+    PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
+    std::vector<std::string> flags;
+    if (config.fraction_of_gpu_memory >= 0.0f ||
+        config.fraction_of_gpu_memory <= 0.95f) {
+      flags.push_back("dummpy");
+      std::string flag = "--fraction_of_gpu_memory_to_use=" +
+                         std::to_string(config.fraction_of_gpu_memory);
+      flags.push_back(flag);
+      VLOG(3) << "set flag: " << flag;
+      framework::InitGflags(flags);
+    }
+  }
+
+  std::unique_ptr<PaddlePredictor> predictor(
+      new TensorRTSubgraphPredictor(config));
+  if (!dynamic_cast<TensorRTSubgraphPredictor*>(predictor.get())
+           ->Init(nullptr)) {
+    return nullptr;
+  }
+  return std::move(predictor);
+}
+
+}  // namespace paddle
@@ -0,0 +1,64 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+#include "paddle/contrib/inference/paddle_inference_api.h"
+
+namespace paddle {
+
+DEFINE_string(dirname, "", "Directory of the inference model.");
+
+void Main(bool use_gpu) {
+  //# 1. Create PaddlePredictor with a config.
+  TensorRTConfig config;
+  config.model_dir = FLAGS_dirname + "word2vec.inference.model";
+  config.use_gpu = use_gpu;
+  config.fraction_of_gpu_memory = 0.15;
+  config.device = 0;
+  auto predictor =
+      CreatePaddlePredictor<TensorRTConfig,
+                            PaddleEngineKind::kAutoMixedTensorRT>(config);
+
+  for (int batch_id = 0; batch_id < 3; batch_id++) {
+    //# 2. Prepare input.
+    int64_t data[4] = {1, 2, 3, 4};
+
+    PaddleTensor tensor{.name = "",
+                        .shape = std::vector<int>({4, 1}),
+                        .data = PaddleBuf(data, sizeof(data)),
+                        .dtype = PaddleDType::INT64};
+
+    // For simplicity, we set all the slots with the same data.
+    std::vector<PaddleTensor> slots(4, tensor);
+
+    //# 3. Run
+    std::vector<PaddleTensor> outputs;
+    CHECK(predictor->Run(slots, &outputs));
+
+    //# 4. Get output.
+    ASSERT_EQ(outputs.size(), 1UL);
+    LOG(INFO) << "output buffer size: " << outputs.front().data.length();
+    const size_t num_elements = outputs.front().data.length() / sizeof(float);
+    // The outputs' buffers are in CPU memory.
+    for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
+      LOG(INFO) << static_cast<float*>(outputs.front().data.data())[i];
+    }
+  }
+}
+
+TEST(paddle_inference_api_tensorrt_subgraph_engine, main) { Main(true); }
+
+}  // namespace paddle
@@ -1,10 +1,12 @@
-set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init)
 cc_library(analysis SRCS pass_manager.cc dot.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc
   fluid_to_data_flow_graph_pass.cc
   data_flow_graph_to_fluid_pass.cc
-  tensorrt_subgraph_pass.cc
   dfg_graphviz_draw_pass.cc
-  DEPS framework_proto)
+  tensorrt_subgraph_pass.cc
+  tensorrt_subgraph_node_mark_pass.cc
+  analyzer.cc
+  helper.cc
+  DEPS framework_proto proto_desc)
 cc_test(test_node SRCS node_tester.cc DEPS analysis)
 cc_test(test_dot SRCS dot_tester.cc DEPS analysis)
 
@@ -28,5 +30,7 @@ inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_
 inference_analysis_test(test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc)
 inference_analysis_test(test_subgraph_splitter SRCS subgraph_splitter_tester.cc)
 inference_analysis_test(test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc)
-#inference_analysis_test(test_tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass_tester.cc)
+inference_analysis_test(test_tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass_tester.cc)
 inference_analysis_test(test_pass_manager SRCS pass_manager_tester.cc)
+inference_analysis_test(test_tensorrt_subgraph_node_mark_pass SRCS tensorrt_subgraph_node_mark_pass_tester.cc)
+inference_analysis_test(test_analyzer SRCS analyzer_tester.cc)
@@ -0,0 +1,82 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/analysis/analyzer.h"
+#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
+#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
+#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
+#include "paddle/fluid/inference/analysis/pass_manager.h"
+#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h"
+#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+DEFINE_bool(inference_analysis_enable_tensorrt_subgraph_engine, false,
+            "Enable subgraph to TensorRT engine for acceleration");
+
+DEFINE_string(inference_analysis_graphviz_log_root, "./",
+              "Graphviz debuger for data flow graphs.");
+
+class DfgPassManagerImpl final : public DfgPassManager {
+ public:
+  DfgPassManagerImpl() {
+    // TODO(Superjomn) set the key with pass reprs.
+    AddPass("fluid-to-data-flow-graph", new FluidToDataFlowGraphPass);
+    if (FLAGS_inference_analysis_enable_tensorrt_subgraph_engine) {
+      auto trt_teller = [](const Node* node) {
+        if (!node->IsFunction()) return false;
+        return static_cast<const Function*>(node)->func_type() == "mul";
+      };
+      AddPass("tensorrt-subgraph-marker",
+              new TensorRTSubgraphNodeMarkPass(trt_teller));
+      AddPass("tensorrt-subgraph", new TensorRTSubGraphPass(trt_teller));
+    }
+    AddPass("data-flow-graph-to-fluid", new DataFlowGraphToFluidPass);
+  }
+
+  std::string repr() const override { return "dfg-pass-manager"; }
+  std::string description() const override { return "DFG pass manager."; }
+
+ private:
+  void AddPass(const std::string& name, Pass* pass) {
+    LOG(INFO) << "Adding pass " << name;
+    Register(name, pass);
+    AddGraphvizDebugerPass(pass);
+  }
+
+  // Add the graphviz debuger pass if the parent pass has one.
+  void AddGraphvizDebugerPass(Pass* pass) {
+    auto* debuger_pass = pass->CreateGraphvizDebugerPass();
+    if (debuger_pass) {
+      LOG(INFO) << " - register debug pass [" << debuger_pass->repr() << "]";
+      Register(debuger_pass->repr(), debuger_pass);
+    }
+  }
+};
+
+Analyzer::Analyzer() { Register("manager1", new DfgPassManagerImpl); }
+
+void Analyzer::Run(Argument* argument) {
+  for (auto& x : data_) {
+    PADDLE_ENFORCE(x->Initialize(argument));
+    x->RunAll();
+    PADDLE_ENFORCE(x->Finalize());
+  }
+}
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle