PaddlePaddle
diff --git a/‎README.md
Lines changed: 7 additions & 7 deletions b/‎README.md
Lines changed: 7 additions & 7 deletions
diff --git a/‎paddle/fluid/framework/ir/CMakeLists.txt
Lines changed: 6 additions & 0 deletions b/‎paddle/fluid/framework/ir/CMakeLists.txt
Lines changed: 6 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
Lines changed: 90 additions & 0 deletions b/‎paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
Lines changed: 90 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h
Lines changed: 39 additions & 0 deletions b/‎paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h
Lines changed: 39 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc
Lines changed: 108 additions & 0 deletions b/‎paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc
Lines changed: 108 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
Lines changed: 2 additions & 3 deletions b/‎paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
Lines changed: 2 additions & 3 deletions
diff --git a/‎paddle/fluid/framework/ir/graph_pattern_detector.cc
Lines changed: 33 additions & 0 deletions b/‎paddle/fluid/framework/ir/graph_pattern_detector.cc
Lines changed: 33 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/ir/graph_pattern_detector.h
Lines changed: 22 additions & 0 deletions b/‎paddle/fluid/framework/ir/graph_pattern_detector.h
Lines changed: 22 additions & 0 deletions
@@ -19,7 +19,7 @@ Our vision is to enable deep learning for everyone via PaddlePaddle.
 Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
 
 
-### Latest PaddlePaddle Release: [Fluid 0.14.0](https://github.com/PaddlePaddle/Paddle/tree/v0.14.0)
+### Latest PaddlePaddle Release: [Fluid 0.15.0](https://github.com/PaddlePaddle/Paddle/tree/v0.15.0)
 ### Install Latest Stable Release:
 ```
 # Linux CPU
@@ -76,26 +76,26 @@ pip install paddlepaddle-gpu==0.14.0.post85
 
 ## Installation
 
-It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/0.14.0/new_docs/beginners_guide/install/install_doc.html) on our website.
+It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/beginners_guide/install/install_doc.html) on our website.
 
 ## Documentation
 
-We provide [English](http://paddlepaddle.org/documentation/docs/en/0.14.0/getstarted/index_en.html) and
-[Chinese](http://paddlepaddle.org/documentation/docs/zh/0.14.0/new_docs/beginners_guide/index.html) documentation.
+We provide [English](http://paddlepaddle.org/documentation/docs/en/0.15.0/getstarted/index_en.html) and
+[Chinese](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/beginners_guide/index.html) documentation.
 
 - [Deep Learning 101](https://github.com/PaddlePaddle/book)
 
   You might want to start from this online interactive book that can run in a Jupyter Notebook.
 
-- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/0.14.0/new_docs/user_guides/howto/training/cluster_howto.html)
+- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/user_guides/howto/training/cluster_howto.html)
 
   You can run distributed training jobs on MPI clusters.
 
-- [Python API](http://paddlepaddle.org/documentation/api/zh/0.14.0/fluid.html)
+- [Python API](http://paddlepaddle.org/documentation/api/zh/0.15.0/fluid.html)
 
    Our new API enables much shorter programs.
 
-- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/0.14.0/new_docs/advanced_usage/development/contribute_to_paddle.html)
+- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/advanced_usage/development/contribute_to_paddle.html)
 
    We appreciate your contributions!
 
 
@@ -28,6 +28,9 @@ cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph grap
 pass_library(graph_to_program_pass base)
 pass_library(graph_viz_pass base)
 pass_library(fc_fuse_pass inference)
+if(WITH_MKLDNN)
+  pass_library(conv_relu_mkldnn_fuse_pass inference)
+endif()
 pass_library(attention_lstm_fuse_pass inference)
 pass_library(infer_clean_graph_pass inference)
 pass_library(fc_lstm_fuse_pass inference)
@@ -42,3 +45,6 @@ cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_r
 cc_test(graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass)
 cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector)
 cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto)
+if(WITH_MKLDNN)
+  cc_test(test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass)
+endif()
@@ -0,0 +1,90 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h"
+#include <string>
+#include <vector>
+#include "paddle/fluid/platform/enforce.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
+    std::unique_ptr<ir::Graph> graph) const {
+  PADDLE_ENFORCE(graph.get());
+  FusePassBase::Init("conv_relu_mkldnn_fuse", graph.get());
+
+  std::unordered_set<Node*> nodes2delete;
+
+  GraphPatternDetector gpd;
+  auto* conv_input = gpd.mutable_pattern()
+                         ->NewNode("conv_relu_mkldnn_fuse/conv_input")
+                         ->AsInput()
+                         ->assert_is_op_input("conv2d", "Input");
+  patterns::ConvReLU conv_relu_pattern(gpd.mutable_pattern(),
+                                       "conv_relu_mkldnn_fuse");
+  conv_relu_pattern(conv_input);
+
+  int found_conv_relu_count = 0;
+  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
+                     Graph* g) {
+    VLOG(4) << "handle ConvReLU fuse";
+    GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
+                              conv_relu_pattern);  // Filter
+    GET_IR_NODE_FROM_SUBGRAPH(conv_bias, conv_bias, conv_relu_pattern);  // Bias
+    GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern);    // tmp
+    GET_IR_NODE_FROM_SUBGRAPH(conv, conv, conv_relu_pattern);  // CONV op
+    GET_IR_NODE_FROM_SUBGRAPH(relu_out, relu_out, conv_relu_pattern);  // Out
+    GET_IR_NODE_FROM_SUBGRAPH(relu, relu, conv_relu_pattern);  // ReLU op
+
+    // Create an ConvReLU Node.
+    OpDesc desc;
+    std::string conv_relu_i_in = subgraph.at(conv_input)->Name();
+    std::string conv_relu_w_in = conv_weight->Name();
+    std::string conv_relu_b_in = conv_bias->Name();
+    std::string conv_relu_out = relu_out->Name();
+    desc.SetInput("Input", std::vector<std::string>({conv_relu_i_in}));
+    desc.SetInput("Filter", std::vector<std::string>({conv_relu_w_in}));
+    desc.SetInput("Bias", std::vector<std::string>({conv_relu_b_in}));
+    desc.SetOutput("Out", std::vector<std::string>({conv_relu_out}));
+    desc.SetType("conv2d");
+    for (auto& attr : conv->Op()->GetAttrMap()) {
+      desc.SetAttr(attr.first, attr.second);
+    }
+    desc.SetAttr("fuse_relu", true);
+    auto conv_relu_node = g->CreateOpNode(&desc);  // OpDesc will be copied.
+    GraphSafeRemoveNodes(graph.get(), {conv, relu, conv_out});
+
+    PADDLE_ENFORCE(subgraph.count(conv_input));
+    IR_NODE_LINK_TO(subgraph.at(conv_input), conv_relu_node);
+    IR_NODE_LINK_TO(conv_weight, conv_relu_node);
+    IR_NODE_LINK_TO(conv_bias, conv_relu_node);
+    IR_NODE_LINK_TO(conv_relu_node, relu_out);
+
+    found_conv_relu_count++;
+  };
+
+  gpd(graph.get(), handler);
+
+  AddStatis(found_conv_relu_count);
+  return graph;
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+REGISTER_PASS(conv_relu_mkldnn_fuse_pass,
+              paddle::framework::ir::ConvReLUFusePass);
@@ -0,0 +1,39 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
+#include "paddle/fluid/framework/ir/graph.h"
+#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
+#include "paddle/fluid/framework/ir/pass.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+/*
+ * Fuse the CONV and ReLU to a ConvReLUOp.
+ */
+class ConvReLUFusePass : public FusePassBase {
+ public:
+  virtual ~ConvReLUFusePass() {}
+
+ protected:
+  std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
+};
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
@@ -0,0 +1,108 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h"
+
+#include <gtest/gtest.h>
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+void SetOp(ProgramDesc* prog, const std::string& type,
+           const std::vector<std::string>& inputs,
+           const std::vector<std::string>& outputs) {
+  auto* op = prog->MutableBlock(0)->AppendOp();
+  op->SetType(type);
+  if (type == "conv2d") {
+    op->SetAttr("use_mkldnn", true);
+    op->SetInput("Input", {inputs[0]});
+    op->SetInput("Filter", {inputs[1]});
+    op->SetInput("Bias", {inputs[2]});
+  } else if (type == "relu") {
+    op->SetInput("X", inputs);
+  }
+  op->SetOutput("Out", outputs);
+}
+
+// a->OP0->b
+// b->OP1->c
+// (c, weights, bias)->conv->f
+// (f)->relu->g
+ProgramDesc BuildProgramDesc() {
+  ProgramDesc prog;
+  for (auto& v :
+       std::vector<std::string>({"a", "b", "c", "weights", "bias", "f", "g"})) {
+    auto* var = prog.MutableBlock(0)->Var(v);
+    var->SetType(proto::VarType::SELECTED_ROWS);
+    if (v == "weights" || v == "bias") {
+      var->SetPersistable(true);
+    }
+  }
+
+  SetOp(&prog, "OP0", std::vector<std::string>({"a"}),
+        std::vector<std::string>({"b"}));
+  SetOp(&prog, "OP1", std::vector<std::string>({"b"}),
+        std::vector<std::string>({"c"}));
+  SetOp(&prog, "conv2d", std::vector<std::string>({"c", "weights", "bias"}),
+        std::vector<std::string>({"f"}));
+  SetOp(&prog, "relu", std::vector<std::string>({"f"}),
+        std::vector<std::string>({"g"}));
+
+  return prog;
+}
+
+TEST(ConvReLUFusePass, basic) {
+  auto prog = BuildProgramDesc();
+
+  std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
+
+  auto pass = PassRegistry::Instance().Get("conv_relu_mkldnn_fuse_pass");
+
+  int original_nodes_num = graph->Nodes().size();
+
+  graph = pass->Apply(std::move(graph));
+
+  int current_nodes_num = graph->Nodes().size();
+
+  // Remove 3 Nodes: CONV, RELU, conv_out
+  // Add 1 Node: ConvReLU
+  EXPECT_EQ(original_nodes_num - 2, current_nodes_num);
+
+  // Assert conv_relu op in newly generated graph
+  int conv_relu_count = 0;
+
+  for (auto* node : graph->Nodes()) {
+    if (node->IsOp() && node->Op()->Type() == "conv2d") {
+      if (node->Op()->HasAttr("use_mkldnn")) {
+        bool use_mkldnn = boost::get<bool>(node->Op()->GetAttr("use_mkldnn"));
+        if (use_mkldnn) {
+          if (node->Op()->HasAttr("fuse_relu")) {
+            bool fuse_relu = boost::get<bool>(node->Op()->GetAttr("fuse_relu"));
+            if (fuse_relu) {
+              ++conv_relu_count;
+            }
+          }
+        }
+      }
+    }
+  }
+  EXPECT_EQ(conv_relu_count, 1);
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+USE_PASS(conv_relu_mkldnn_fuse_pass);
@@ -51,7 +51,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
     if (with_fc_bias) {
       // Add FC-bias with LSTM-bias and create a new weight
       PADDLE_ENFORCE(scope);
-      const std::string& new_bias_var = name_scope + "_bias.new";
+      const std::string& new_bias_var = patterns::UniqueKey("NewBias");
       auto* bias_var = scope->Var(new_bias_var);
       PADDLE_ENFORCE(bias_var);
       auto* bias_tensor = bias_var->GetMutable<framework::LoDTensor>();
@@ -120,7 +120,6 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
 
   auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                      Graph* g) {
-
     GET_IR_NODE_FROM_SUBGRAPH(lstm, lstm, lstm_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(Weight, Weight, lstm_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(Bias, Bias, lstm_pattern);
@@ -136,7 +135,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
                    fc_bias);
       // Remove unneeded nodes.
       std::unordered_set<const Node*> marked_nodes(
-          {mul, lstm, elementwise_add});
+          {mul, lstm, elementwise_add, fc_bias});
       GraphSafeRemoveNodes(graph, marked_nodes);
     } else {
       GET_IR_NODE_FROM_SUBGRAPH(fc_out, mul_out, fc_pattern);
 
@@ -522,6 +522,39 @@ bool VarLinksFromOp(Node* node, const std::string& op_type) {
   return false;
 }
 
+PDNode* patterns::ConvReLU::operator()(
+    paddle::framework::ir::PDNode* conv_input) {
+  // Create Operators
+  conv_input->assert_is_op_input("conv2d", "Input");
+  auto* conv_op = pattern->NewNode(conv_repr())->assert_is_op("conv2d");
+  auto* relu_op = pattern->NewNode(relu_repr())->assert_is_op("relu");
+  // Create variables
+  // Filter
+  auto* conv_weight_var = pattern->NewNode(conv_weight_repr())
+                              ->AsInput()
+                              ->assert_is_persistable_var()
+                              ->assert_is_op_input("conv2d", "Filter");
+  // Bias
+  auto* conv_bias_var = pattern->NewNode(conv_bias_repr())
+                            ->AsInput()
+                            ->assert_is_persistable_var()
+                            ->assert_is_op_input("conv2d", "Bias");
+  // intermediate variable, will be removed in the IR after fuse.
+  auto* conv_out_var = pattern->NewNode(conv_out_repr())
+                           ->AsIntermediate()
+                           ->assert_is_only_output_of_op("conv2d")
+                           ->assert_is_op_input("relu");
+  // output
+  auto* relu_out_var = pattern->NewNode(relu_out_repr())
+                           ->AsOutput()
+                           ->assert_is_op_output("relu");
+
+  conv_op->LinksFrom({conv_input, conv_weight_var, conv_bias_var})
+      .LinksTo({conv_out_var});
+  relu_op->LinksFrom({conv_out_var}).LinksTo({relu_out_var});
+  return relu_out_var;
+}
+
 PDNode* patterns::FC::operator()(paddle::framework::ir::PDNode* x,
                                  bool with_bias) {
   // Create shared nodes.
 
@@ -360,6 +360,28 @@ struct PatternBase {
   size_t id_;
 };
 
+// CONV with ReLU
+// op: conv + relu
+// named nodes:
+// conv_input, conv_weight,
+// conv_bias, conv_out, conv,
+// relu_out, relu
+struct ConvReLU : public PatternBase {
+  ConvReLU(PDPattern* pattern, const std::string& name_scope)
+      : PatternBase(pattern, name_scope, "conv_relu") {}
+
+  PDNode* operator()(PDNode* conv_input);
+
+  // declare operator node's name
+  PATTERN_DECL_NODE(conv);
+  PATTERN_DECL_NODE(relu);
+  // declare variable node's name
+  PATTERN_DECL_NODE(conv_weight);
+  PATTERN_DECL_NODE(conv_bias);
+  PATTERN_DECL_NODE(conv_out);
+  PATTERN_DECL_NODE(relu_out);
+};
+
 // FC with bias
 // op: mul + elementwise_add
 // named nodes: