Merge branch 'master' into fix_pack_padded_sequence

EikanWang · EikanWang · commit 8408ee068707 · 2020-09-18T14:59:39.000+08:00
Conflicts:
	scripts/cpu/gen-dense-cpu-ops.py
	tests/cpu/test_bf16_lazy_reorder.py
	torch_ipex/csrc/cpu/DevOPs.cpp
	torch_ipex/csrc/cpu/DevOPs.h
diff --git a/scripts/cpu/gen-dense-cpu-ops.py b/scripts/cpu/gen-dense-cpu-ops.py
@@ -75,12 +75,15 @@
     'aten::_unsafe_view(Tensor self, int[] size) -> Tensor',
     'aten::native_layer_norm(Tensor input, Tensor? weight, Tensor? bias, int M, int N, float eps) -> (Tensor, Tensor, Tensor)',
     'aten::native_layer_norm_backward(Tensor grad_out, Tensor input, Tensor mean, Tensor rstd, Tensor? weight, int M, int N, bool[3] output_mask) -> (Tensor, Tensor, Tensor)',
+    # 'aten::copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)',
     'aten::_pack_padded_sequence(Tensor input, Tensor lengths, bool batch_first) -> (Tensor, Tensor)'
 ]
 
 _FN_IPEX_FUNCS_WITH_SIMPLE_ATEN_SIG = [
     'aten::index_select(Tensor self, int dim, Tensor index) -> Tensor',
+    # 'aten::copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)',
     'aten::_pack_padded_sequence(Tensor input, Tensor lengths, bool batch_first) -> (Tensor, Tensor)'
+
 ]
 
 _SHALLOW_FALLBACK_TO_CPU_TENSOR_LIST = 'shallowFallbackToCPUTensorList'
@@ -330,7 +333,10 @@ def is_out_func(fname):
         if fname.endswith('_'):
             assert len(dnnl_tensor_param_vars) > 0
             code += '      if (dbl::chk::dnnl_inplace_support_the_tensors(dnnl_input_tensors)) {\n'
-            code += '        return AtenIpexCPUDev::dil_{}({});\n'.format(fname, ', '.join(list(param_vars)))
+            if self.is_ipex_func(aten_func_sig_str):
+                code += self.gen_ipex_func_code(fname, param_vars)
+            else:
+                code += '        return AtenIpexCPUDev::dil_{}({});\n'.format(fname, ', '.join(list(param_vars)))
             code += '      }\n' # Check support tensors
         else:
             param_seq_str_vec = []
@@ -339,12 +345,7 @@ def is_out_func(fname):
                 param_seq_str_vec.append(param_seq_str)
 
             if self.is_ipex_func(aten_func_sig_str):
-                code += '      auto _result = AtenIpexCPUDev::dil_{}({});\n'.format(fname, ', '.join(param_seq_str_vec))
-                code += '      if (is_ipex_func_success()) {\n'
-                code += '        return _result;\n'
-                code += '      } else {\n'
-                code += '        reset_ipex_func_status();\n'
-                code += '      }\n'
+                code += self.gen_ipex_func_code(fname, param_seq_str_vec)
             else:
                 code += '      if (dbl::chk::dnnl_support_the_tensors(dnnl_input_tensors)) {\n'
                 code += '        return AtenIpexCPUDev::dil_{}({});\n'.format(fname, ', '.join(param_seq_str_vec))
@@ -359,6 +360,16 @@ def is_out_func(fname):
 
         return code
 
+    def gen_ipex_func_code(self, fname, param_vars):
+        code = ''
+        code += '        auto _result = AtenIpexCPUDev::dil_{}({});\n'.format(fname, ', '.join(param_vars))
+        code += '        if (is_ipex_func_success()) {\n'
+        code += '          return _result;\n'
+        code += '        } else {\n'
+        code += '          reset_ipex_func_status();\n'
+        code += '        }\n'
+        return code
+
     def gen_fallback_prepare_code(self, cpp_sig):
         code = ''
         op_check_code = ''
diff --git a/torch_ipex/csrc/cpu/DevOPs.cpp b/torch_ipex/csrc/cpu/DevOPs.cpp
@@ -2063,5 +2063,31 @@ std::tuple<at::Tensor,at::Tensor> AtenIpexCPUDev::dil__pack_padded_sequence(cons
     std::get<1>(_ipex_result));
 }
 
+at::Tensor& AtenIpexCPUDev::dil_copy_(
+    at::Tensor & self,
+    const at::Tensor & src,
+    bool non_blocking) {
+  DEBUG("AtenIpexCPUDev::dil_copy_\n");
+  torch_ipex::reset_ipex_func_status();
+
+  IPEX_CHECK(
+    self.device().type() == c10::DeviceType::DPCPP &&
+    src.device().type() == c10::DeviceType::DPCPP,
+    "IPEX copy only work on DPCPP tensor");
+  if (ShadeDataContext::isDilTensor(src) &&ShadeDataContext::isTensorMixPrecision(src)){
+    IPEX_CHECK(check_tensor_own_whole_storage(self),  "IPEX copy only works while self tensor own the whole storage");
+    auto dil_src = dbl::comm::try_gen_dil_tensor(src);
+    IPEX_CHECK(dil_src.get_data_type() == dil::data_type::bf16)
+    auto new_buffer_desc = dil_src.get_desc();
+    dil::tensor dil_buffer{new_buffer_desc};
+    dil_src.reorder_to(dil_buffer);
+    dbl::comm::equip_dil_buffer(self, dil_buffer);
+    return self;
+  }
+    // TODO: We need add more LP here
+  torch_ipex::set_ipex_func_status(torch_ipex::IPEXFuncStatus::IPEX_FALLBACK);
+  return self;
+}
+
 }  // namespace cpu
 }  // namespace torch_ipex
diff --git a/torch_ipex/csrc/cpu/DevOPs.h b/torch_ipex/csrc/cpu/DevOPs.h
@@ -83,6 +83,8 @@ class AtenIpexCPUDev {
   static at::Tensor dil__unsafe_view(const at::Tensor & self, at::IntArrayRef size);
   static at::Tensor dil_shuffle(const at::Tensor & self, at::IntArrayRef view_shape, int64_t dim0, int64_t dim1);
   static std::tuple<at::Tensor,at::Tensor> dil__pack_padded_sequence(const at::Tensor & input, const at::Tensor & lengths, bool batch_first);
+  static at::Tensor& dil_copy_(at::Tensor & self, const at::Tensor & src, bool non_blocking);
+
 };
 
 }  // namespace cpu
diff --git a/torch_ipex/csrc/cpu/bf16/Bridge.cpp b/torch_ipex/csrc/cpu/bf16/Bridge.cpp
@@ -12,6 +12,9 @@ namespace bf16 {
 
 at::Tensor gen_consistent_tensor(const at::Tensor & self) {
   // Reorder dil buffer to public because aten tensor does not support blocked format
+  if (!ShadeDataContext::isDilTensor(self)){
+    return bridge::shallowFallbackToCPUTensor(self);
+  }
   dbl::comm::reorder_to_public(self, /*keep data type*/true);
 
   dil::tensor& self_dil_storage = ShadeDataContext::getDilStorage(self);
diff --git a/torch_ipex/csrc/cpu/bf16/DevOPs.cpp b/torch_ipex/csrc/cpu/bf16/DevOPs.cpp
@@ -13,7 +13,7 @@ namespace bf16 {
 
 at::Tensor index_select(const at::Tensor & self, int64_t dim, const at::Tensor & index) {
   auto&& _tensor = bf16::gen_consistent_tensor(self);
-  auto&& _ipex_index = bridge::shallowFallbackToCPUTensor(index);
+  auto&& _ipex_index = bf16::gen_consistent_tensor(index);
   auto&& _ipex_result = at::index_select(_tensor, dim, _ipex_index);
   return bf16::gen_mix_prec_tensor(_ipex_result);
 }
diff --git a/torch_patches/dpcpp-v1.5-rc3.patch b/torch_patches/dpcpp-v1.5-rc3.patch

Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@ namespace bf16 {`
`13`	`13`
`14`	`14`	`at::Tensor index_select(const at::Tensor & self, int64_t dim, const at::Tensor & index) {`
`15`	`15`	`auto&& _tensor = bf16::gen_consistent_tensor(self);`
`16`		`- auto&& _ipex_index = bridge::shallowFallbackToCPUTensor(index);`
	`16`	`+ auto&& _ipex_index = bf16::gen_consistent_tensor(index);`
`17`	`17`	`auto&& _ipex_result = at::index_select(_tensor, dim, _ipex_index);`
`18`	`18`	`return bf16::gen_mix_prec_tensor(_ipex_result);`
`19`	`19`	`}`