[Inductor] Set prop_kind to forward_inference when grad is not needed for mkldnn_linear_pointwise and mkldnn_convolution_pointwise (pytorch#147072)

jiayisunx · pytorchmergebot · commit 00333c454824 · 2025-03-20T06:21:31.000Z
Summary: The `prop_kind` of `mkldnn._linear_pointwise`, `mkldnn._linear_pointwise.binary`, `mkldnn._convolution_pointwise.binary` and `mkldnn._convolution_pointwise_.binary` are always `dnnl_forward`, i.e., `dnnl_forward_training` , regardless of whether `grad` is needed. Setting `prop_kind` to `dnnl_forward_inference` for these ops when `grad` is not needed could have better performance. Pull Request resolved: pytorch#147072 Approved by: https://github.com/leslie-fang-intel, https://github.com/CaoE, https://github.com/jansel
diff --git a/aten/src/ATen/native/mkldnn/Conv.cpp b/aten/src/ATen/native/mkldnn/Conv.cpp
@@ -440,6 +440,7 @@ Tensor mkldnn_convolution_pointwise_binary(
       po.append_eltwise(unary_alg, 0.f, 0.f);
     }
     op_attr.set_post_ops(po);
+    auto aprop_kind = ideep::prop_kind::forward_inference;
 
     if (bias.defined()) {
       const ideep::tensor b = itensor_from_tensor(bias);
@@ -456,7 +457,9 @@ Tensor mkldnn_convolution_pointwise_binary(
           padding_expanded,
           groups,
           /* is_channels_last */ true,
-          op_attr);
+          op_attr,
+          ideep::algorithm::convolution_direct,
+          aprop_kind);
     } else {
       ideep::convolution_forward::compute_binary(
           x,
@@ -470,7 +473,9 @@ Tensor mkldnn_convolution_pointwise_binary(
           padding_expanded,
           groups,
           /* is_channels_last */ true,
-          op_attr);
+          op_attr,
+          ideep::algorithm::convolution_direct,
+          aprop_kind);
     }
     return output;
   } else {
@@ -573,6 +578,7 @@ Tensor& mkldnn_convolution_pointwise_binary_(
     } else {
       op_attr = ideep::attr_t::fuse_sum();
     }
+    auto aprop_kind = ideep::prop_kind::forward_inference;
     _mkldnn_convolution_out(
         input_t,
         weight_t,
@@ -584,7 +590,8 @@ Tensor& mkldnn_convolution_pointwise_binary_(
         padding_expanded,
         groups,
         true,
-        op_attr);
+        op_attr,
+        aprop_kind);
   } else {
     // Fallback case, if inputs are not channels last or have different dtype,
     // OneDNN fusion may have performance regression.
diff --git a/aten/src/ATen/native/mkldnn/Linear.cpp b/aten/src/ATen/native/mkldnn/Linear.cpp
@@ -187,6 +187,14 @@ Tensor mkldnn_linear_pointwise(
     std::string_view attr,
     c10::List<std::optional<at::Scalar>> scalars,
     std::optional<std::string_view> algorithm) {
+  auto aprop_kind = ideep::prop_kind::forward;
+  bool maybe_backward = GradMode::is_enabled() &&
+      (input_t.requires_grad() || weight_t.requires_grad() ||
+       (bias_opt.has_value() && bias_opt->defined() &&
+        bias_opt->requires_grad()));
+  if (!maybe_backward) {
+    aprop_kind = ideep::prop_kind::forward_inference;
+  }
   auto input = input_t.contiguous();
   auto input_size = input.sizes();
 
@@ -238,13 +246,15 @@ Tensor mkldnn_linear_pointwise(
         w,
         mkldnn_bias.value(),
         mkldnn_output,
-        op_attr);
+        op_attr,
+        aprop_kind);
   } else {
     ideep::inner_product_forward::compute</*reorder_src=*/false, /*reorder_weight=*/false>(
         mkldnn_input,
         w,
         mkldnn_output,
-        op_attr);
+        op_attr,
+        aprop_kind);
   }
 
   if (dim != 2) {
@@ -317,6 +327,7 @@ Tensor mkldnn_linear_pointwise_binary(
 
   auto other_desc = mkldnn_other.get_desc();
   auto op_attr = ideep::attr_t::fuse_binary(it_binary->second, other_desc);
+  auto aprop_kind = ideep::prop_kind::forward_inference;
 
   if (mkldnn_bias.has_value()) {
     ideep::inner_product_forward::compute_binary</*reorder_src=*/false, /*reorder_weight=*/false>(
@@ -325,10 +336,11 @@ Tensor mkldnn_linear_pointwise_binary(
         w,
         mkldnn_bias.value(),
         mkldnn_output,
-        op_attr);
+        op_attr,
+        aprop_kind);
   } else {
     ideep::inner_product_forward::compute_binary</*reorder_src=*/false, /*reorder_weight=*/false>(
-        mkldnn_input, mkldnn_other, w, mkldnn_output, op_attr);
+        mkldnn_input, mkldnn_other, w, mkldnn_output, op_attr, aprop_kind);
   }
 
   if (dim != 2) {
diff --git a/aten/src/ATen/native/mkldnn/MKLDNNConversions.cpp b/aten/src/ATen/native/mkldnn/MKLDNNConversions.cpp
@@ -182,7 +182,7 @@ Tensor mkldnn_reorder_conv2d_weight(
       dilation_expanded,
       groups,
       ideep::algorithm::convolution_direct,
-      ideep::prop_kind::forward,
+      ideep::prop_kind::forward_inference,
       w.get_data_type(),
       src_dims,
       ideep::attr_t(),
@@ -229,7 +229,7 @@ Tensor mkldnn_reorder_conv3d_weight(
       dilation_expanded,
       groups,
       ideep::algorithm::convolution_direct,
-      ideep::prop_kind::forward,
+      ideep::prop_kind::forward_inference,
       w.get_data_type(),
       src_dims,
       ideep::attr_t(),
@@ -273,7 +273,8 @@ static Tensor mkldnn_reorder_linear_weight(
       {out_features, in_features},
       input_size,
       /* weight dtype */ dtype,
-      /* src dtype */ dtype);
+      /* src dtype */ dtype,
+      ideep::prop_kind::forward_inference);
   ideep::tensor result;
   result.init(packed_desc);
   result.feed_from(w);