cherry-pick MKL-DNN NHWC FWD support fix (#21593)

bingyanghuang · luotao1 · commit 1f598dfa18f2 · 2019-12-06T15:47:13.000+08:00
diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc
@@ -79,8 +79,9 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const {
       x_dims, x_dims.size());
 
   const int64_t C =
-      (data_layout == DataLayout::kNCHW ? x_dims[1]
-                                        : x_dims[x_dims.size() - 1]);
+      ((this->IsMKLDNNType() == true) || (data_layout == DataLayout::kNCHW)
+           ? x_dims[1]
+           : x_dims[x_dims.size() - 1]);
 
   auto scale_dim = ctx->GetInputDim("Scale");
   auto bias_dim = ctx->GetInputDim("Bias");
@@ -154,6 +155,32 @@ framework::OpKernelType BatchNormOp::GetExpectedKernelType(
                                  library);
 }
 
+framework::OpKernelType BatchNormOp::GetKernelTypeForVar(
+    const std::string &var_name, const Tensor &tensor,
+    const framework::OpKernelType &expected_kernel_type) const {
+#ifdef PADDLE_WITH_MKLDNN
+  // Only input require reshaping, weights and
+  // bias are having shape in NCHW order
+  if ((var_name == "X") &&
+      (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
+      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
+    auto attrs = Attrs();
+    auto ar = paddle::framework::AttrReader(attrs);
+    const std::string data_layout = ar.Get<std::string>("data_layout");
+    auto dl = framework::StringToDataLayout(data_layout);
+    // Some models may have intentionally set "AnyLayout" for pool
+    // op. Treat this as NCHW (default data_format value)
+    if (dl != framework::DataLayout::kAnyLayout) {
+      return framework::OpKernelType(
+          expected_kernel_type.data_type_, tensor.place(),
+          framework::StringToDataLayout(data_layout));
+    }
+  }
+#endif
+  return framework::OpKernelType(expected_kernel_type.data_type_,
+                                 tensor.place(), tensor.layout());
+}
+
 void BatchNormOpMaker::Make() {
   AddAttr<bool>("is_test",
                 "(bool, default false) Set to true for inference only, false "
@@ -446,6 +473,12 @@ framework::OpKernelType BatchNormGradOp::GetExpectedKernelType(
 #ifdef PADDLE_WITH_MKLDNN
   if (library == framework::LibraryType::kPlain &&
       platform::CanMKLDNNBeUsed(ctx)) {
+    // TODO(jczaja): Add support for NHWC
+    const std::string data_layout = ctx.Attr<std::string>("data_layout");
+    PADDLE_ENFORCE_NE(
+        data_layout, "NHWC",
+        platform::errors::Unimplemented(
+            "Batch Norm MKLDNN grad does not support NHWC data format yet"));
     library = framework::LibraryType::kMKLDNN;
     layout = framework::DataLayout::kMKLDNN;
   }
diff --git a/paddle/fluid/operators/batch_norm_op.h b/paddle/fluid/operators/batch_norm_op.h
@@ -47,6 +47,10 @@ class BatchNormOp : public framework::OperatorWithKernel {
  protected:
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext &ctx) const override;
+
+  framework::OpKernelType GetKernelTypeForVar(
+      const std::string &var_name, const Tensor &tensor,
+      const framework::OpKernelType &expected_kernel_type) const override;
 };
 
 class BatchNormGradOp : public framework::OperatorWithKernel {
diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
@@ -775,8 +775,23 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
      * ('any') which lets a primitive (conv backward in this case) choose
      * the memory format preferred for best performance
      */
-    auto chosen_memory_format = MKLDNNMemoryFormat::any;
+
+    // TODO(jczaja): Once GRAD NHWC is working then format 'any'
+    // should be used exclusively. But till forward pass enforce
+    // NCHW for training we need to have NCHW here as well
+    // to avoid performance degradation in relu_grad and pool2d_grad
+    std::string data_format = ctx.Attr<std::string>("data_format");
+    auto chosen_memory_format =
+        platform::data_format_to_memory_format(data_format);
+
     weights_format = MKLDNNMemoryFormat::any;
+    // Check the format for user's special output
+    if (chosen_memory_format != MKLDNNMemoryFormat::any) {
+      if (is_conv3d) {
+        chosen_memory_format =
+            platform::MKLDNNFormatForSize(src_tz.size(), chosen_memory_format);
+      }
+    }
 
     auto src_md = platform::MKLDNNMemDesc(
         src_tz, platform::MKLDNNGetDataType<T>(), chosen_memory_format);
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_batch_norm_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_batch_norm_mkldnn_op.py
@@ -84,6 +84,13 @@ def test_check_output(self):
         self.check_with_place(place, data_format, self.dtype, [2, 3, 4, 5])
 
 
+class TestMKLDNNBatchNormOpInference_NHWC(TestMKLDNNBatchNormOpInference):
+    def test_check_output(self):
+        place = core.CPUPlace()
+        data_format = "NHWC"
+        self.check_with_place(place, data_format, self.dtype, [2, 4, 5, 3])
+
+
 class TestMKLDNNBatchNormOpWithReluInference(TestBatchNormOpInference):
     def init_kernel_type(self):
         self.use_mkldnn = True
diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py
@@ -259,6 +259,21 @@ def check_with_place(self, place, data_layout, dtype, shape):
 
         batch_norm_op.run(scope, place)
 
+        # When op is called without Executor then
+        # MKL-DNN Tensor is returned. For NHWC data layout
+        # dims will be in NCHW order as it is MKL-DNN way
+        # of memory descripting. So we need to convert NCHW
+        # dims into NHWC.
+        if data_layout == "NHWC" and self.use_mkldnn == True:
+            # Create executor to have MKL-DNN cache 
+            # cleared after NHWC unit test
+            place = core.CPUPlace()
+            exe = fluid.Executor(place)
+            dims = y_tensor.shape()
+            c = dims.pop(1)
+            dims.append(c)
+            y_tensor._set_dims(dims)
+
         # check inference result
         self.__assert_close(
             y_tensor,