Enable shuffle operator

EikanWang · EikanWang · commit 03e161cd376c · 2020-09-04T07:30:11.000-07:00
diff --git a/torch_ipex/csrc/cpu/DevOPs.cpp b/torch_ipex/csrc/cpu/DevOPs.cpp
@@ -6,6 +6,8 @@
 #include <ATen/NamedTensorUtils.h>
 #include <c10/util/Exception.h>
 #include <c10/util/Logging.h>
+#include <torch/csrc/autograd/function.h>
+#include <torch/csrc/autograd/record_function.h>
 
 #include <limits>
 
@@ -2021,5 +2023,19 @@ at::Tensor AtenIpexCPUDev::dil_index_select(
   return at::Tensor();
 }
 
+at::Tensor AtenIpexCPUDev::dil_shuffle(const at::Tensor & self, at::IntArrayRef view_shape, int64_t dim0, int64_t dim1) {
+  DEBUG("AtenIpexCPUDev::dil_shuffle\n");
+  RECORD_FUNCTION("AtenIpexCPUDev::dil_shuffle", std::vector<c10::IValue>(), -1);
+  // NOTE: We do NOT add sanity checks here. Because PyTorch does not has shuffle operator. This dil operator is for fusion and the fusion logic
+  // has more sanity checks. We found that there are some models use view + transpose + view to implement shuffle semantic. So IPEX will fuse these
+  // operators a single shuffle.
+  dil::tensor&& x = dbl::comm::try_gen_dil_tensor(self);
+  dil::tensor y;
+  auto group_dim = dim0 < dim1 ? dim0 : dim1;
+  auto groups = view_shape[group_dim];
+  dil::channel_shuffle_forward::compute(std::move(x), y, groups, group_dim);
+  return dbl::comm::gen_aten_tensor_by(std::move(y));
+}
+
 }  // namespace cpu
 }  // namespace torch_ipex
diff --git a/torch_ipex/csrc/cpu/DevOPs.h b/torch_ipex/csrc/cpu/DevOPs.h
@@ -81,6 +81,7 @@ class AtenIpexCPUDev {
   static at::Tensor dil_view(const at::Tensor & self, at::IntArrayRef size);
   static at::Tensor dil_index_select(const at::Tensor & self, int64_t dim, const at::Tensor & index);
   static at::Tensor dil__unsafe_view(const at::Tensor & self, at::IntArrayRef size);
+  static at::Tensor dil_shuffle(const at::Tensor & self, at::IntArrayRef view_shape, int64_t dim0, int64_t dim1);
 };
 
 }  // namespace cpu
diff --git a/torch_ipex/csrc/jit/fusion_pass.cpp b/torch_ipex/csrc/jit/fusion_pass.cpp
@@ -308,6 +308,9 @@ void FusionPass(std::shared_ptr<Graph> &graph) {
   // Fuse conv with eltwise operator
   graph_rewrite::FuseConvolutionWithEltwise(graph);
 
+  // Fuse operators as shuffle
+  graph_rewrite::FuseShuffle(graph);
+
   // Pattern based fusion was lack of alias analysis
   // ??? It may either be too conservative or too aggressive ???
   // getSubgraphRewriter().runOnGraph(graph);
diff --git a/torch_ipex/csrc/jit/graph_rewrite.cpp b/torch_ipex/csrc/jit/graph_rewrite.cpp
@@ -52,6 +52,96 @@ std::unordered_map<std::string, c10::IValue> getConvParams(
   return calc_values;
 }
 
+void FuseShuffle(std::shared_ptr<Graph>& graph) {
+  std::string shuffle = R"(
+      graph(%input, %view_shape:int[], %trans_dim0:int, %trans_dim1:int, %mem_format:int, %flattern_shape:int[]):
+        %r = aten::view(%input, %view_shape)
+        %r = aten::transpose(%r, %trans_dim0, %trans_dim1)
+        %r = aten::contiguous(%r, %mem_format)
+        %r = aten::view(%r, %flattern_shape)
+        return (%r) )";
+
+  std::string shuffle_2d_fusion = R"(
+      graph(%input, %view_shape:int[], %trans_dim0:int, %trans_dim1:int, %mem_format:int, %flattern_shape:int[]):
+        %r = ipex::shuffle_2d(%input, %view_shape, %trans_dim0, %trans_dim1)
+        return (%r) )";
+
+  auto filter_shuffle_2d_fusion = [] (
+      const Match& match,
+      const std::unordered_map<std::string, Value*>& vmap) {
+    const auto& match_vmap = match.values_map;
+    auto input_ = getIValue("input", match_vmap, vmap).value();
+    if (!(input_.isTensor())) {
+      return false;
+    }
+    auto view_shape_ = getIValue("view_shape", match_vmap, vmap).value();
+    if (!(view_shape_.isIntList())) {
+      return false;
+    }
+    auto trans_dim0_ = getIValue("trans_dim0", match_vmap, vmap).value();
+    if (!(trans_dim0_.isInt())) {
+      return false;
+    }
+    auto trans_dim1_ = getIValue("trans_dim1", match_vmap, vmap).value();
+    if (!(trans_dim1_.isInt())) {
+      return false;
+    }
+    auto flattern_shape_ = getIValue("flattern_shape", match_vmap, vmap).value();
+    if (!(flattern_shape_.isInt())) {
+      return false;
+    }
+
+    auto trans_dim0_val = trans_dim0_.toInt();
+    auto trans_dim1_val = trans_dim1_.toInt();
+    auto dim0_val = trans_dim0_val < trans_dim1_val ? trans_dim0_val : trans_dim1_val;
+    auto dim1_val = trans_dim0_val > trans_dim1_val ? trans_dim0_val : trans_dim1_val;
+    // If the tranpose if not for groups. ex. [n, c1, c2, h, w] => [n, c2, c1, h, w]
+    if ((dim1_val - dim0_val) != 1) {
+      return false;
+    }
+
+    auto input_val = input_.toTensor();
+    auto view_shape_val = view_shape_.toIntVector();
+    auto flattern_shape_val = flattern_shape_.toIntVector();
+    // ex. [n, c, h, w] => [n, groups, c // groups, h, w]
+    if ((input_val.ndimension() - view_shape_val.size()) != -1) {
+      return false;
+    }
+
+    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(dim0_val >= 0);
+    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(dim1_val >= 0);
+    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(dim0_val + 1 < input_val.ndimension());
+    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(dim1_val + 1 < input_val.ndimension());
+    if (view_shape_val[dim0_val] * view_shape_val[dim1_val] != input_val.size(dim0_val)) {
+      return false;
+    }
+
+    if (flattern_shape_val.size() != input_val.ndimension()) {
+      return false;
+    }
+
+    for (int i = 0; i < flattern_shape_val.size(); i++) {
+      if (flattern_shape_val[i] != input_val.size(i)) {
+        // [n, c, h, w] => view [n, groups, c // groups, h, w] => tranpose [n, c // groups, groups, h, w]
+        // => view [n, -1, h, w]
+        //    or
+        //    view [n, c, h, w]
+        if ((flattern_shape_val[i] != -1) || (i != dim0_val)) {
+          return false;
+        }
+      }
+    }
+
+    return true;
+  };
+
+  SubgraphRewriter rewriter_shuffle_2d;
+  rewriter_shuffle_2d.RegisterRewritePattern(
+    shuffle,
+    shuffle_2d_fusion);
+  rewriter_shuffle_2d.runOnGraph(graph);
+}
+
 void FuseConvolutionWithEltwise(std::shared_ptr<Graph>& graph) {
   std::string conv2d_swish_fusion = R"(
       graph(%a, %w, %b, %stride:int[], %padding:int[], %dilation:int[], %groups:int):
diff --git a/torch_ipex/csrc/jit/graph_rewrite.h b/torch_ipex/csrc/jit/graph_rewrite.h
@@ -21,6 +21,7 @@ c10::optional<IValue> getIValue(
     const std::unordered_map<std::string, Value*>& vmap);
 void replaceConvolutionWithAtenConv(std::shared_ptr<Graph>& graph);
 void FuseConvolutionWithEltwise(std::shared_ptr<Graph>& graph);
+void FuseShuffle(std::shared_ptr<Graph>& graph);
 
 } // namespace graph_rewrite_helper
 } // namespace jit
diff --git a/torch_ipex/csrc/jit/register_dnnl_jit_ops.cpp b/torch_ipex/csrc/jit/register_dnnl_jit_ops.cpp
@@ -4,8 +4,8 @@
 #include <torch/csrc/jit/runtime/custom_operator.h>
 
 #include "torch_ipex/csrc/utils.h"
-#include "cpu/FusionOPs.h"
-
+#include "torch_ipex/csrc/cpu/FusionOPs.h"
+#include "torch_ipex/csrc/cpu/DevOPs.h"
 
 namespace torch {
 namespace jit {
@@ -24,6 +24,26 @@ at::Tensor toOptionalTensor(const IValue& v) {
 using namespace torch_ipex::cpu;
 
 RegisterOperators op({
+    Operator(
+      "ipex::shuffle_2d(Tensor input, int[5] view_shape, int trans_dim0, int trans_dim1) -> Tensor",
+      [] (const Node* node) ->Operation {
+        if (torch_ipex::check_auto_dnnl()) {
+          return [] (Stack& stack) {
+            auto result = AtenIpexCPUDev::dil_shuffle(
+                (std::move(peek(stack, 0, 4))).toTensor(),
+                (std::move(peek(stack, 1, 4))).toIntVector(),
+                (std::move(peek(stack, 2, 4))).toInt(),
+                (std::move(peek(stack, 3, 4))).toInt());
+            drop(stack, 4);
+            pack(stack, std::move(result));
+            return 0;
+          };
+        } else {
+          TORCH_CHECK(false, "PyTorch native path not support shuffle fusion for 2d case");
+        }
+      },
+      aliasAnalysisFromSchema()
+      ),
     Operator(
       "ipex::conv2d_relu(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1, int groups=1) -> Tensor",
       [] (const Node* node) ->Operation {