PaddlePaddle
diff --git a/‎doc/fluid/dev/api_doc_std_cn.md
Lines changed: 5 additions & 4 deletions b/‎doc/fluid/dev/api_doc_std_cn.md
Lines changed: 5 additions & 4 deletions
diff --git a/‎doc/fluid/dev/api_doc_std_en.md
Lines changed: 5 additions & 4 deletions b/‎doc/fluid/dev/api_doc_std_en.md
Lines changed: 5 additions & 4 deletions
diff --git a/‎paddle/contrib/inference/test_paddle_inference_api_impl.cc
Lines changed: 2 additions & 7 deletions b/‎paddle/contrib/inference/test_paddle_inference_api_impl.cc
Lines changed: 2 additions & 7 deletions
diff --git a/‎paddle/fluid/framework/data_type.cc
Lines changed: 3 additions & 0 deletions b/‎paddle/fluid/framework/data_type.cc
Lines changed: 3 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/details/fuse_vars_op_handle.cc
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/framework/details/fuse_vars_op_handle.cc
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/framework/executor.cc
Lines changed: 17 additions & 1 deletion b/‎paddle/fluid/framework/executor.cc
Lines changed: 17 additions & 1 deletion
diff --git a/‎paddle/fluid/framework/executor.h
Lines changed: 2 additions & 0 deletions b/‎paddle/fluid/framework/executor.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/tests/book/test_inference_image_classification.cc
Lines changed: 1 addition & 4 deletions b/‎paddle/fluid/inference/tests/book/test_inference_image_classification.cc
Lines changed: 1 addition & 4 deletions
diff --git a/‎paddle/fluid/inference/tests/book/test_inference_nlp.cc
Lines changed: 0 additions & 4 deletions b/‎paddle/fluid/inference/tests/book/test_inference_nlp.cc
Lines changed: 0 additions & 4 deletions
diff --git a/‎paddle/fluid/inference/tests/test_helper.h
Lines changed: 7 additions & 18 deletions b/‎paddle/fluid/inference/tests/test_helper.h
Lines changed: 7 additions & 18 deletions
@@ -1,8 +1,9 @@
 # API注释撰写标准
 
-- [API注释模块](#API注释模块)
-- [格式及示例](#格式及示例)
-- [完整示例](#完整示例)
+- [API注释撰写标准](#api)
+    - [API注释模块](#api)
+    - [格式及示例](#)
+    - [完整示例](#)
 
 
 ## API注释模块
@@ -217,4 +218,4 @@ API文档须使用reStructuredText格式撰写，该格式详情请参考[链接
 
 ## 完整示例
 
-fc 的完整注释见[示例](src/fc.py)。
+fc 的完整注释见[示例](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/dev/src/fc.py)。
@@ -1,8 +1,9 @@
 # API Doc Standard
 
-- [API Doc Structure](#API Doc Structure)
-- [Format and Examples](#Format and Examples)
-- [Complete Example](#Complete Example)
+- [API Doc Standard](#api-doc-standard)
+    - [API Doc Structure](#api-doc-structure)
+    - [Format and Examples](#format-and-examples)
+    - [Complete Example](#complete-example)
 
 
 ## API Doc Structure
@@ -223,4 +224,4 @@ Format and examples of each part of API documantation are as follows: (take fc f
 
 ## Complete Example
 
-Complete Example of fc please see [here](src/fc.py)。
+Complete Example of fc please see [here](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/dev/src/fc.py)。
@@ -109,7 +109,6 @@ void MainWord2Vec(bool use_gpu) {
 
 void MainImageClassification(bool use_gpu) {
   int batch_size = 2;
-  bool use_mkldnn = false;
   bool repeat = false;
   NativeConfig config = GetConfig();
   config.use_gpu = use_gpu;
@@ -134,12 +133,8 @@ void MainImageClassification(bool use_gpu) {
   std::vector<framework::LoDTensor*> cpu_fetchs1;
   cpu_fetchs1.push_back(&output1);
 
-  TestInference<platform::CPUPlace, false, true>(config.model_dir,
-                                                 cpu_feeds,
-                                                 cpu_fetchs1,
-                                                 repeat,
-                                                 is_combined,
-                                                 use_mkldnn);
+  TestInference<platform::CPUPlace, false, true>(
+      config.model_dir, cpu_feeds, cpu_fetchs1, repeat, is_combined);
 
   auto predictor = CreatePaddlePredictor(config);
   std::vector<PaddleTensor> paddle_tensor_feeds;
 
@@ -28,6 +28,9 @@ struct DataTypeMap {
 };
 
 static DataTypeMap* InitDataTypeMap();
+// C++11 removes the need for manual locking. Concurrent execution shall wait if
+// a static local variable is already being initialized.
+// https://stackoverflow.com/questions/11711920/how-to-implement-multithread-safe-singleton-in-c11-without-using-mutex
 static DataTypeMap& gDataTypeMap() {
   static DataTypeMap* g_data_type_map_ = InitDataTypeMap();
   return *g_data_type_map_;
 
@@ -42,7 +42,7 @@ void FuseVarsOpHandle::RunImpl() {
     out_t->ShareDataWith(out_tensor->Slice(s, s + numel));
     s += numel;
   }
-  this->RunAndRecordEvent([this] {});
+  this->RunAndRecordEvent([] {});
 }
 
 std::string FuseVarsOpHandle::Name() const { return "fuse vars"; }
 
@@ -24,6 +24,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/profiler.h"
 
 DECLARE_bool(benchmark);
+DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run");
 
 namespace paddle {
 namespace framework {
@@ -115,6 +116,7 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope,
 void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
                    bool create_local_scope, bool create_vars) {
   platform::RecordBlock b(block_id);
+  if (FLAGS_use_mkldnn) EnableMKLDNN(pdesc);
   auto ctx = Prepare(pdesc, block_id);
   RunPreparedContext(ctx.get(), scope, create_local_scope, create_vars);
 }
@@ -214,6 +216,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
                    const std::string& feed_holder_name,
                    const std::string& fetch_holder_name) {
   platform::RecordBlock b(kProgramId);
+  if (FLAGS_use_mkldnn) EnableMKLDNN(program);
   bool has_feed_ops =
       has_feed_operators(program.Block(0), *feed_targets, feed_holder_name);
   bool has_fetch_ops =
@@ -225,7 +228,6 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
     unique_ptr_of_copy_program.reset(new ProgramDesc(program));
     copy_program = unique_ptr_of_copy_program.get();
   }
-
   auto* global_block = copy_program->MutableBlock(0);
 
   if (!has_feed_ops) {
@@ -378,5 +380,19 @@ void Executor::RunPreparedContext(
   }
 }
 
+void Executor::EnableMKLDNN(const ProgramDesc& program) {
+#ifdef PADDLE_WITH_MKLDNN
+  VLOG(3) << "use_mkldnn=True";
+  for (size_t bid = 0; bid < program.Size(); ++bid) {
+    auto* block = const_cast<ProgramDesc&>(program).MutableBlock(bid);
+    for (auto* op : block->AllOps()) {
+      if (op->HasAttr("use_mkldnn")) {
+        op->SetAttr("use_mkldnn", true);
+      }
+    }
+  }
+#endif
+}
+
 }  // namespace framework
 }  // namespace paddle
@@ -81,6 +81,8 @@ class Executor {
                           const std::string& feed_holder_name = "feed",
                           const std::string& fetch_holder_name = "fetch");
 
+  void EnableMKLDNN(const ProgramDesc& program);
+
  private:
   const platform::Place place_;
 };
 
@@ -21,7 +21,6 @@ DEFINE_string(fp16_dirname, "", "Directory of the float16 inference model.");
 DEFINE_int32(batch_size, 1, "Batch size of input data");
 DEFINE_int32(repeat, 1, "Running the inference program repeat times");
 DEFINE_bool(skip_cpu, false, "Skip the cpu test");
-DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference");
 
 TEST(inference, image_classification) {
   if (FLAGS_dirname.empty() || FLAGS_batch_size < 1 || FLAGS_repeat < 1) {
@@ -59,10 +58,8 @@ TEST(inference, image_classification) {
     // Run inference on CPU
     LOG(INFO) << "--- CPU Runs: ---";
     LOG(INFO) << "Batch size is " << FLAGS_batch_size;
-    LOG(INFO) << "FLAGS_use_mkldnn: " << FLAGS_use_mkldnn;
     TestInference<paddle::platform::CPUPlace, false, true>(
-        dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, is_combined,
-        FLAGS_use_mkldnn);
+        dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, is_combined);
     LOG(INFO) << output1.dims();
   }
 
 
@@ -27,7 +27,6 @@ limitations under the License. */
 DEFINE_string(model_path, "", "Directory of the inference model.");
 DEFINE_string(data_file, "", "File of input index data.");
 DEFINE_int32(repeat, 100, "Running the inference program repeat times");
-DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference");
 DEFINE_bool(prepare_vars, true, "Prepare variables before executor");
 DEFINE_int32(num_threads, 1, "Number of threads should be used");
 
@@ -190,9 +189,6 @@ TEST(inference, nlp) {
     std::unique_ptr<paddle::framework::ProgramDesc> inference_program;
     inference_program = InitProgram(&executor, scope.get(), FLAGS_model_path,
                                     /*model combined*/ false);
-    if (FLAGS_use_mkldnn) {
-      EnableMKLDNN(inference_program);
-    }
     // always prepare context
     std::unique_ptr<paddle::framework::ExecutorPrepareContext> ctx;
     ctx = executor.Prepare(*inference_program, 0);
 
@@ -22,6 +22,8 @@ limitations under the License. */
 #include "paddle/fluid/inference/io.h"
 #include "paddle/fluid/platform/profiler.h"
 
+DECLARE_bool(use_mkldnn);
+
 template <typename T>
 void SetupTensor(paddle::framework::LoDTensor* input,
                  paddle::framework::DDim dims, T lower, T upper) {
@@ -133,24 +135,11 @@ std::vector<std::vector<int64_t>> GetFeedTargetShapes(
   return feed_target_shapes;
 }
 
-void EnableMKLDNN(
-    const std::unique_ptr<paddle::framework::ProgramDesc>& program) {
-  for (size_t bid = 0; bid < program->Size(); ++bid) {
-    auto* block = program->MutableBlock(bid);
-    for (auto* op : block->AllOps()) {
-      if (op->HasAttr("use_mkldnn")) {
-        op->SetAttr("use_mkldnn", true);
-      }
-    }
-  }
-}
-
 template <typename Place, bool CreateVars = true, bool PrepareContext = false>
 void TestInference(const std::string& dirname,
                    const std::vector<paddle::framework::LoDTensor*>& cpu_feeds,
                    const std::vector<paddle::framework::LoDTensor*>& cpu_fetchs,
-                   const int repeat = 1, const bool is_combined = false,
-                   const bool use_mkldnn = false) {
+                   const int repeat = 1, const bool is_combined = false) {
   // 1. Define place, executor, scope
   auto place = Place();
   auto executor = paddle::framework::Executor(place);
@@ -182,9 +171,6 @@ void TestInference(const std::string& dirname,
         "init_program",
         paddle::platform::DeviceContextPool::Instance().Get(place));
     inference_program = InitProgram(&executor, scope, dirname, is_combined);
-    if (use_mkldnn) {
-      EnableMKLDNN(inference_program);
-    }
   }
   // Disable the profiler and print the timing information
   paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault,
@@ -210,7 +196,10 @@ void TestInference(const std::string& dirname,
     fetch_targets[fetch_target_names[i]] = cpu_fetchs[i];
   }
 
-  // 6. Run the inference program
+  // 6. If export Flags_use_mkldnn=True, use mkldnn related ops.
+  if (FLAGS_use_mkldnn) executor.EnableMKLDNN(*inference_program);
+
+  // 7. Run the inference program
   {
     if (!CreateVars) {
       // If users don't want to create and destroy variables every time they
Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@ void FuseVarsOpHandle::RunImpl() {`
`42`	`42`	`out_t->ShareDataWith(out_tensor->Slice(s, s + numel));`
`43`	`43`	`s += numel;`
`44`	`44`	`}`
`45`		`- this->RunAndRecordEvent([this] {});`
	`45`	`+ this->RunAndRecordEvent([] {});`
`46`	`46`	`}`
`47`	`47`
`48`	`48`	`std::string FuseVarsOpHandle::Name() const { return "fuse vars"; }`