PaddlePaddle
diff --git a/‎paddle/fluid/API.spec
Lines changed: 2 additions & 2 deletions b/‎paddle/fluid/API.spec
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddle/fluid/inference/analysis/subgraph_splitter.cc
Lines changed: 8 additions & 1 deletion b/‎paddle/fluid/inference/analysis/subgraph_splitter.cc
Lines changed: 8 additions & 1 deletion
diff --git a/‎paddle/fluid/inference/api/analysis_predictor.cc
Lines changed: 3 additions & 0 deletions b/‎paddle/fluid/inference/api/analysis_predictor.cc
Lines changed: 3 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/api/api.cc
Lines changed: 9 additions & 7 deletions b/‎paddle/fluid/inference/api/api.cc
Lines changed: 9 additions & 7 deletions
diff --git a/‎paddle/fluid/inference/api/api_impl.cc
Lines changed: 3 additions & 0 deletions b/‎paddle/fluid/inference/api/api_impl.cc
Lines changed: 3 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/api/paddle_inference_api.h
Lines changed: 3 additions & 1 deletion b/‎paddle/fluid/inference/api/paddle_inference_api.h
Lines changed: 3 additions & 1 deletion
diff --git a/‎paddle/fluid/inference/tests/api/CMakeLists.txt
Lines changed: 17 additions & 1 deletion b/‎paddle/fluid/inference/tests/api/CMakeLists.txt
Lines changed: 17 additions & 1 deletion
diff --git a/‎paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
Lines changed: 3 additions & 4 deletions b/‎paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
Lines changed: 3 additions & 4 deletions
diff --git a/‎paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
Lines changed: 133 additions & 0 deletions b/‎paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
Lines changed: 133 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/tests/api/tester_helper.h
Lines changed: 27 additions & 12 deletions b/‎paddle/fluid/inference/tests/api/tester_helper.h
Lines changed: 27 additions & 12 deletions
@@ -305,9 +305,9 @@ paddle.fluid.layers.target_assign ArgSpec(args=['input', 'matched_indices', 'neg
 paddle.fluid.layers.detection_output ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0))
 paddle.fluid.layers.ssd_loss ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None))
 paddle.fluid.layers.detection_map ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral'))
-paddle.fluid.layers.rpn_target_assign ArgSpec(args=['loc', 'scores', 'anchor_box', 'anchor_var', 'gt_box', 'rpn_batch_size_per_im', 'fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap'], varargs=None, keywords=None, defaults=(256, 0.25, 0.7, 0.3))
+paddle.fluid.layers.rpn_target_assign ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True))
 paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None))
-paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'gt_boxes', 'im_scales', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None))
+paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True))
 paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None))
 paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 
@@ -120,13 +120,20 @@ void UnionContractedNodes(const std::unordered_map<int, BriefNode *> &node_map,
     outputs.insert(node);
   }
 
-  // update the dst and src node's inlinks and outlinks.
+// update the dst and src node's inlinks and outlinks.
+#ifdef __clang__
+  src_node->inlinks = std::vector<BriefNode *>(inputs.begin(), inputs.end());
+  src_node->outlinks = std::vector<BriefNode *>(outputs.begin(), outputs.end());
+  dst_node->inlinks.clear();
+  dst_node->outlinks.clear();
+#else
   src_node->inlinks =
       std::move(std::vector<BriefNode *>(inputs.begin(), inputs.end()));
   src_node->outlinks =
       std::move(std::vector<BriefNode *>(outputs.begin(), outputs.end()));
   dst_node->inlinks.clear();
   dst_node->outlinks.clear();
+#endif
 
   auto inlink_or_outlink_cleaner = [&](std::vector<BriefNode *> &nodes) {
     for (auto *&n : nodes) {
 
@@ -77,6 +77,9 @@ bool AnalysisPredictor::Init(
 
   OptimizeInferenceProgram();
   ctx_ = executor_->Prepare(*inference_program_, 0);
+  if (config_._use_mkldnn) {
+    executor_->EnableMKLDNN(*inference_program_);
+  }
 
   VLOG(5) << "to create variables";
   PADDLE_ENFORCE(scope_.get());
 
@@ -9,8 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <glog/logging.h>
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/platform/enforce.h"
 
 namespace paddle {
 
@@ -64,13 +64,15 @@ PaddleBuf& PaddleBuf::operator=(PaddleBuf&& other) {
 
 void PaddleBuf::Resize(size_t length) {
   // Only the owned memory can be reset, the external memory can't be changed.
-  if (length_ == length) return;
+  if (length_ >= length) return;
   if (memory_owned_) {
     Free();
+    data_ = malloc(length);
+    length_ = length;
+    memory_owned_ = true;
+  } else {
+    PADDLE_THROW("The memory is allocated externally, can not Resized");
   }
-  data_ = new char[length];
-  length_ = length;
-  memory_owned_ = true;
 }
 
 void PaddleBuf::Reset(void* data, size_t length) {
@@ -82,8 +84,8 @@ void PaddleBuf::Reset(void* data, size_t length) {
 
 void PaddleBuf::Free() {
   if (memory_owned_ && data_) {
-    assert(length_ > 0);
-    delete[] static_cast<char*>(data_);
+    PADDLE_ENFORCE_GT(length_, 0);
+    free(static_cast<char*>(data_));
     data_ = nullptr;
     length_ = 0;
   }
 
@@ -106,6 +106,9 @@ bool NativePaddlePredictor::Init(
   }
 
   ctx_ = executor_->Prepare(*inference_program_, 0);
+  if (config_._use_mkldnn) {
+    executor_->EnableMKLDNN(*inference_program_);
+  }
   executor_->CreateVariables(*inference_program_,
                              sub_scope_ ? sub_scope_ : scope_.get(), 0);
 
 
@@ -45,7 +45,7 @@ class PaddleBuf {
   PaddleBuf(void* data, size_t length)
       : data_(data), length_(length), memory_owned_{false} {}
   // Own memory.
-  PaddleBuf(size_t length)
+  explicit PaddleBuf(size_t length)
       : data_(new char[length]), length_(length), memory_owned_(true) {}
   // Resize to `length` bytes.
   void Resize(size_t length);
@@ -121,6 +121,8 @@ struct NativeConfig : public PaddlePredictor::Config {
   bool use_gpu{false};
   int device{0};
   float fraction_of_gpu_memory{-1.f};  // Negative to notify initialization.
+  // NOTE: NOT use it, just for the internal test, will discard later
+  bool _use_mkldnn{false};
   // Specify the variable's name of each input.
   bool specify_input_name{false};
 
 
@@ -53,5 +53,21 @@ set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classifi
 download_model_and_data(${TEXT_CLASSIFICATION_INSTALL_DIR} "text-classification-Senta.tar.gz" "text_classification_data.txt.tar.gz")
 inference_analysis_test(test_analyzer_text_classification SRCS analyzer_text_classification_tester.cc
     EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
-    ARGS --infer_model=${TEXT_CLASSIFICATION_INSTALL_DIR}/text-classification-Senta
+    ARGS --infer_model=${TEXT_CLASSIFICATION_INSTALL_DIR}/model
          --infer_data=${TEXT_CLASSIFICATION_INSTALL_DIR}/data.txt)
+
+# ocr
+set(OCR_MODEL_URL "http://paddlemodels.cdn.bcebos.com/inference-vis-demos%2Focr.tar.gz")
+set(OCR_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/ocr")
+if (NOT EXISTS ${OCR_INSTALL_DIR} AND WITH_INFERENCE)
+    get_filename_component(filename ${OCR_MODEL_URL} NAME)
+    message(STATUS "Download inference test stuff ${filename} from ${OCR_MODEL_URL}")
+    execute_process(COMMAND bash -c "mkdir -p ${OCR_INSTALL_DIR}")
+    execute_process(COMMAND bash -c "cd ${OCR_INSTALL_DIR} && wget -q ${OCR_MODEL_URL}")
+    execute_process(COMMAND bash -c "cd ${OCR_INSTALL_DIR} && tar xzf ${filename}")
+    message(STATUS "finish downloading ${filename}")
+endif()
+inference_analysis_test(test_analyzer_ocr SRCS analyzer_vis_tester.cc
+    EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
+    ARGS --infer_model=${OCR_INSTALL_DIR}/model
+        --infer_data=${OCR_INSTALL_DIR}/data.txt)
@@ -110,8 +110,7 @@ const int64_t lac_ref_data[] = {24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25,
 
 void TestLACPrediction(const std::string &model_path,
                        const std::string &data_file, const int batch_size,
-                       const int repeat, bool test_all_data,
-                       bool use_analysis = false) {
+                       const int repeat, bool use_analysis = false) {
   AnalysisConfig cfg;
   cfg.model_dir = model_path;
   cfg.use_gpu = false;
@@ -199,13 +198,13 @@ void TestLACPrediction(const std::string &model_path,
 TEST(Analyzer_LAC, native) {
   LOG(INFO) << "LAC with native";
   TestLACPrediction(FLAGS_infer_model, FLAGS_infer_data, FLAGS_batch_size,
-                    FLAGS_repeat, FLAGS_test_all_data);
+                    FLAGS_repeat);
 }
 
 TEST(Analyzer_LAC, analysis) {
   LOG(INFO) << "LAC with analysis";
   TestLACPrediction(FLAGS_infer_model, FLAGS_infer_data, FLAGS_batch_size,
-                    FLAGS_repeat, FLAGS_test_all_data, true);
+                    FLAGS_repeat, true);
 }
 
 }  // namespace analysis
 
@@ -0,0 +1,133 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <fstream>
+#include <iostream>
+#include "paddle/fluid/inference/tests/api/tester_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+struct Record {
+  std::vector<float> data;
+  std::vector<int32_t> shape;
+};
+
+Record ProcessALine(const std::string &line) {
+  VLOG(3) << "process a line";
+  std::vector<std::string> columns;
+  split(line, '\t', &columns);
+  CHECK_EQ(columns.size(), 2UL)
+      << "data format error, should be <data>\t<shape>";
+
+  Record record;
+  std::vector<std::string> data_strs;
+  split(columns[0], ' ', &data_strs);
+  for (auto &d : data_strs) {
+    record.data.push_back(std::stof(d));
+  }
+
+  std::vector<std::string> shape_strs;
+  split(columns[1], ' ', &shape_strs);
+  for (auto &s : shape_strs) {
+    record.shape.push_back(std::stoi(s));
+  }
+  VLOG(3) << "data size " << record.data.size();
+  VLOG(3) << "data shape size " << record.shape.size();
+  return record;
+}
+
+/*
+ * Use the native and analysis fluid engine to inference the demo.
+ * ocr, mobilenet and se_resnext50
+ */
+void TestVisualPrediction(bool use_mkldnn) {
+  std::unique_ptr<PaddlePredictor> predictor;
+  AnalysisConfig cfg;
+  cfg.param_file = FLAGS_infer_model + "/__params__";
+  cfg.prog_file = FLAGS_infer_model + "/__model__";
+  cfg.use_gpu = false;
+  cfg._use_mkldnn = use_mkldnn;
+  cfg.device = 0;
+  cfg.enable_ir_optim = true;
+  // TODO(TJ): fix fusion gru
+  cfg.ir_passes.push_back("fc_gru_fuse_pass");
+#ifdef PADDLE_WITH_MKLDNN
+  // disable mkldnn fuse since it should have some bugs
+  cfg.ir_passes.push_back("conv_relu_mkldnn_fuse_pass");
+#endif
+  predictor =
+      CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(cfg);
+
+  // Only have single batch of data.
+  std::string line;
+  std::ifstream file(FLAGS_infer_data);
+  std::getline(file, line);
+  auto record = ProcessALine(line);
+  file.close();
+
+  // Inference.
+  PaddleTensor input;
+  input.shape = record.shape;
+  input.data =
+      PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
+  input.dtype = PaddleDType::FLOAT32;
+
+  std::vector<PaddleTensor> outputs_slots;
+  Timer timer;
+  timer.tic();
+  for (int i = 0; i < FLAGS_repeat; i++) {
+    predictor->Run({input}, &outputs_slots);
+  }
+  PrintTime(/*batch size*/ 1, FLAGS_repeat, /*num threads*/ 1, /*thread id*/ 0,
+            timer.toc() / FLAGS_repeat);
+
+  VLOG(3) << "output.size " << outputs_slots.size();
+
+  // run native as reference
+  auto ref_predictor =
+      CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(cfg);
+  std::vector<PaddleTensor> ref_outputs_slots;
+  ref_predictor->Run({input}, &ref_outputs_slots);
+  CompareResult(outputs_slots, ref_outputs_slots);
+  // print what are fused
+  AnalysisPredictor *analysis_predictor =
+      dynamic_cast<AnalysisPredictor *>(predictor.get());
+  auto &fuse_statis = analysis_predictor->analysis_argument()
+                          .Get<std::unordered_map<std::string, int>>(
+                              framework::ir::kFuseStatisAttr);
+  for (auto &item : fuse_statis) {
+    LOG(INFO) << "fused " << item.first << " " << item.second;
+  }
+  int num_ops = 0;
+  for (auto &node :
+       analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) {
+    if (node->IsFunction()) {
+      ++num_ops;
+    }
+  }
+  LOG(INFO) << "has num ops: " << num_ops;
+}
+
+TEST(Analyzer_vis, analysis) { TestVisualPrediction(/*use_mkldnn*/ false); }
+#ifdef PADDLE_WITH_MKLDNN
+TEST(Analyzer_vis, analysis_mkldnn) {
+  TestVisualPrediction(/*use_mkldnn*/ true);
+}
+#endif
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle
@@ -37,22 +37,37 @@ namespace paddle {
 namespace inference {
 
 void CompareResult(const std::vector<PaddleTensor> &outputs,
-                   const std::vector<PaddleTensor> &base_outputs) {
-  PADDLE_ENFORCE_GT(outputs.size(), 0);
-  PADDLE_ENFORCE_EQ(outputs.size(), base_outputs.size());
+                   const std::vector<PaddleTensor> &ref_outputs) {
+  EXPECT_GT(outputs.size(), 0);
+  EXPECT_EQ(outputs.size(), ref_outputs.size());
   for (size_t i = 0; i < outputs.size(); i++) {
     auto &out = outputs[i];
-    auto &base_out = base_outputs[i];
+    auto &ref_out = ref_outputs[i];
     size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1,
                                   [](int a, int b) { return a * b; });
-    size_t size1 = std::accumulate(base_out.shape.begin(), base_out.shape.end(),
-                                   1, [](int a, int b) { return a * b; });
-    PADDLE_ENFORCE_EQ(size, size1);
-    PADDLE_ENFORCE_GT(size, 0);
-    float *data = static_cast<float *>(out.data.data());
-    float *base_data = static_cast<float *>(base_out.data.data());
-    for (size_t i = 0; i < size; i++) {
-      EXPECT_NEAR(data[i], base_data[i], 1e-3);
+    size_t ref_size =
+        std::accumulate(ref_out.shape.begin(), ref_out.shape.end(), 1,
+                        [](int a, int b) { return a * b; });
+    EXPECT_GT(size, 0);
+    EXPECT_EQ(size, ref_size);
+    EXPECT_EQ(out.dtype, ref_out.dtype);
+    switch (out.dtype) {
+      case PaddleDType::INT64: {
+        int64_t *pdata = static_cast<int64_t *>(out.data.data());
+        int64_t *pdata_ref = static_cast<int64_t *>(ref_out.data.data());
+        for (size_t j = 0; j < size; ++j) {
+          EXPECT_EQ(pdata_ref[j], pdata[j]);
+        }
+        break;
+      }
+      case PaddleDType::FLOAT32: {
+        float *pdata = static_cast<float *>(out.data.data());
+        float *pdata_ref = static_cast<float *>(ref_out.data.data());
+        for (size_t j = 0; j < size; ++j) {
+          EXPECT_NEAR(pdata_ref[j], pdata[j], 1e-3);
+        }
+        break;
+      }
     }
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -106,6 +106,9 @@ bool NativePaddlePredictor::Init(`
`106`	`106`	`}`
`107`	`107`
`108`	`108`	`ctx_ = executor_->Prepare(*inference_program_, 0);`
	`109`	`+ if (config_._use_mkldnn) {`
	`110`	`+ executor_->EnableMKLDNN(*inference_program_);`
	`111`	`+ }`
`109`	`112`	`executor_->CreateVariables(*inference_program_,`
`110`	`113`	`sub_scope_ ? sub_scope_ : scope_.get(), 0);`
`111`	`114`