Skip to content

Commit e29a7a4

Browse files
authored
modify dil OPs (#68)
* modify dil OPs * prepack weight of convolution_relu and convolution_inplace_fusion
1 parent a302c94 commit e29a7a4

File tree

12 files changed

+101
-122
lines changed

12 files changed

+101
-122
lines changed

intel_pytorch_extension_py/ops/jit_script.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ def script_(obj, optimize=None, _frames_up=0, _rcb=None):
1818
jit_m = wrap_cpp_module(torch._C._jit_pass_fold_convbn(jit_m._c))
1919
core.enable_auto_dnnl()
2020

21-
jit_m = wrap_cpp_module(core._jit_prepack_conv_weight(jit_m._c))
22-
2321
return jit_m
2422

2523

intel_pytorch_extension_py/ops/module.py

Lines changed: 0 additions & 26 deletions
This file was deleted.

scripts/cpu/gen-dense-cpu-ops.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@
2222
]
2323

2424
_FN_DNNL_FUNCS_WITH_SIMPLE_ATEN_SIG = [
25-
# 'aten::add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor',
26-
# 'aten::add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)',
27-
# 'aten::add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)',
25+
'aten::add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor',
26+
'aten::add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)',
27+
'aten::add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)',
2828
'aten::mul.Tensor(Tensor self, Tensor other) -> Tensor',
2929
'aten::mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)',
3030
'aten::mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)',
@@ -66,8 +66,8 @@
6666
'aten::clone(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor',
6767
'aten::gelu(Tensor self) -> Tensor',
6868
'aten::gelu_backward(Tensor grad, Tensor self) -> Tensor',
69-
'aten::native_layer_norm(Tensor input, Tensor? weight, Tensor? bias, int M, int N, float eps) -> (Tensor, Tensor, Tensor)',
70-
'aten::native_layer_norm_backward(Tensor grad_out, Tensor input, Tensor mean, Tensor rstd, Tensor? weight, int M, int N, bool[3] output_mask) -> (Tensor, Tensor, Tensor)'
69+
#'aten::native_layer_norm(Tensor input, Tensor? weight, Tensor? bias, int M, int N, float eps) -> (Tensor, Tensor, Tensor)',
70+
#'aten::native_layer_norm_backward(Tensor grad_out, Tensor input, Tensor mean, Tensor rstd, Tensor? weight, int M, int N, bool[3] output_mask) -> (Tensor, Tensor, Tensor)'
7171
]
7272

7373
_SHALLOW_FALLBACK_TO_CPU_TENSOR_LIST = 'shallowFallbackToCPUTensorList'

tests/cpu/test_bf16_lazy_reorder.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -119,26 +119,6 @@ def test_batch_norm3d(self):
119119

120120
self.assertEqual(res_man_bf16.float(), res_auto_mix)
121121

122-
class TestLayerNorm(TestCase):
123-
def test_layer_norm(self):
124-
rand_seed = int(get_rand_seed())
125-
print("{} rand sed: {}".format(sys._getframe().f_code.co_name, rand_seed))
126-
torch.manual_seed(rand_seed)
127-
128-
x_fp32 = torch.randn(2, 5, 10, 10, dtype=torch.float32, device=device)
129-
x_bf16 = x_fp32.to(torch.bfloat16)
130-
131-
m = torch.nn.LayerNorm([10, 10])
132-
m_man_bf16 =copy.deepcopy(m).to(device=device)
133-
m_auto_mix =copy.deepcopy(m).to(device=device)
134-
135-
res_fp32 = m(x_fp32)
136-
137-
with AutoDNNL(True), AutoMixPrecision(False):
138-
res_man_bf16 = m_man_bf16(x_bf16)
139-
self.assertEqual(res_man_bf16.dtype, torch.bfloat16)
140-
self.assertEqual(res_fp32.bfloat16().float(), res_man_bf16, 2e-2)
141-
142122
class TestRelu(TestCase):
143123
def test_relu(self):
144124
rand_seed = int(get_rand_seed())

tests/cpu/test_jit.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,7 @@ def _test_output_bf16(self, model, x, kind=None, prec=None):
259259
core.enable_auto_dnnl()
260260

261261
core.enable_mix_bf16_fp32()
262-
# prepack convolution weight, weight will be a bf16 tensor
263-
fused_model = wrap_cpp_module(core._jit_prepack_conv_weight(fused_model._c))
262+
264263
with torch.no_grad():
265264
# bf16, native path
266265
result = model(x)

torch_ipex/csrc/cpu/CustomOPs.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class NewLinearOp : public torch::autograd::Function<NewLinearOp> {
1818
at::Tensor bias = at::Tensor()) {
1919
ctx->save_for_backward({input, weight, bias});
2020
if (torch_ipex::check_auto_dnnl() && input.device().type() == c10::DeviceType::DPCPP) {
21-
return torch_ipex::cpu::AtenIpexCPUDev::dil_linear(input.is_contiguous() ? input : input.contiguous(), weight, bias);
21+
return torch_ipex::cpu::AtenIpexCPUDev::dil_linear(input.is_contiguous() ? input : input.contiguous(), weight.is_contiguous() ? weight : weight.contiguous(), bias.is_contiguous() ? bias : bias.contiguous());
2222
} else {
2323
return at::linear(input, weight, bias);
2424
}
@@ -38,9 +38,9 @@ class NewLinearOp : public torch::autograd::Function<NewLinearOp> {
3838

3939
if (torch_ipex::check_auto_dnnl() && input.device().type() == c10::DeviceType::DPCPP) {
4040
grad_input = torch_ipex::cpu::AtenIpexCPUDev::dil_linear_backward_input(
41-
input.sizes(), grad_output.contiguous(), weight);
41+
input.sizes(), grad_output.is_contiguous() ? grad_output : grad_output.contiguous(), weight.is_contiguous() ? weight : weight.contiguous());
4242
std::tie(grad_weight, grad_bias) = torch_ipex::cpu::AtenIpexCPUDev::dil_linear_backward_weights(
43-
grad_output.contiguous(), input, weight, bias.defined());
43+
grad_output.is_contiguous() ? grad_output : grad_output.contiguous(), input.is_contiguous() ? input : input.contiguous(), weight.is_contiguous() ? weight : weight.contiguous(), bias.defined());
4444
} else {
4545
grad_input = grad_output.mm(weight);
4646
grad_weight = grad_output.t().mm(input);
@@ -69,7 +69,7 @@ class NewMaxPoolingOp : public torch::autograd::Function<NewMaxPoolingOp> {
6969
ctx->saved_data["ceil_mode"] = ceil_mode;
7070

7171
if (torch_ipex::check_auto_dnnl() && input.device().type() == c10::DeviceType::DPCPP) {
72-
at::Tensor output = torch_ipex::cpu::AtenIpexCPUDev::dil_max_pooling(input, kernel_size, stride,
72+
at::Tensor output = torch_ipex::cpu::AtenIpexCPUDev::dil_max_pooling(input.is_contiguous() ? input : input.contiguous(), kernel_size, stride,
7373
padding, dilation, ceil_mode);
7474
ctx->save_for_backward({input, output});
7575
return output;
@@ -88,7 +88,7 @@ class NewMaxPoolingOp : public torch::autograd::Function<NewMaxPoolingOp> {
8888
at::Tensor input = saved[0];
8989
at::Tensor indices = saved[1];
9090

91-
at::Tensor grad_output = grad_outputs[0].contiguous();
91+
at::Tensor grad_output = grad_outputs[0];
9292
at::Tensor grad_input;
9393

9494
std::vector<int64_t> kernel_size = ctx->saved_data["kernel_size"].toIntVector();
@@ -99,7 +99,7 @@ class NewMaxPoolingOp : public torch::autograd::Function<NewMaxPoolingOp> {
9999

100100
if (torch_ipex::check_auto_dnnl() && input.device().type() == c10::DeviceType::DPCPP) {
101101
grad_input = torch_ipex::cpu::AtenIpexCPUDev::dil_max_pooling_backward(
102-
grad_output, indices, input, kernel_size, stride, padding, dilation, ceil_mode);
102+
grad_output.is_contiguous() ? grad_output : grad_output.contiguous(), indices.is_contiguous() ? indices : indices.contiguous(), input.is_contiguous() ? input : input.contiguous(), kernel_size, stride, padding, dilation, ceil_mode);
103103
} else {
104104
grad_input = at::max_pool2d_with_indices_backward(grad_output, input, kernel_size,
105105
stride, padding, dilation, ceil_mode, indices);
@@ -118,7 +118,7 @@ class NewApaptiveAvgPoolingOp : public torch::autograd::Function<NewApaptiveAvgP
118118

119119
at::Tensor output;
120120
if (torch_ipex::check_auto_dnnl() && input.device().type() == c10::DeviceType::DPCPP) {
121-
output = torch_ipex::cpu::AtenIpexCPUDev::dil_adaptive_avg_pool2d(input, output_size);
121+
output = torch_ipex::cpu::AtenIpexCPUDev::dil_adaptive_avg_pool2d(input.is_contiguous() ? input : input.contiguous(), output_size);
122122
} else {
123123
output = at::_adaptive_avg_pool2d(input, output_size);
124124
}
@@ -131,11 +131,11 @@ class NewApaptiveAvgPoolingOp : public torch::autograd::Function<NewApaptiveAvgP
131131
auto saved = ctx->get_saved_variables();
132132
at::Tensor input = saved[0];
133133

134-
at::Tensor grad_output = grad_outputs[0].contiguous();
134+
at::Tensor grad_output = grad_outputs[0];
135135
at::Tensor grad_input;
136136

137137
if (torch_ipex::check_auto_dnnl() && input.device().type() == c10::DeviceType::DPCPP) {
138-
grad_input = torch_ipex::cpu::AtenIpexCPUDev::dil_adaptive_avg_pool2d_backward(grad_output, input);
138+
grad_input = torch_ipex::cpu::AtenIpexCPUDev::dil_adaptive_avg_pool2d_backward(grad_output.is_contiguous() ? grad_output : grad_output.contiguous(), input.is_contiguous() ? input : input.contiguous());
139139
} else {
140140
grad_input = at::_adaptive_avg_pool2d_backward(grad_output, input);
141141
}

torch_ipex/csrc/cpu/DevOPs.cpp

Lines changed: 20 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -58,34 +58,9 @@ at::Tensor AtenIpexCPUDev::dil_convolution(
5858
dil_bias = dbl::comm::try_gen_dil_tensor(bias);
5959
}
6060

61-
// Prepack weight tensor if it's either a *cpu tensor* or a *plain dil tensor*
62-
//
63-
// Note: weight tensor will not be re-packed unless user has implicitly
64-
// triggered `to_public` by accessing its data
65-
// One caveat is when the input size has changed and prepacked weight
66-
// might not be the best fit for new input size, the weight will not
67-
// be re-packed in such cases, but it still ensures the correctness
68-
//
69-
// TODO: once semantics of "own shade context" is equivalent to
70-
// "is dil tensor", we could remove the first check below
7161
dbl::comm::reorder_to_bf16_for_mix_prec(weight);
72-
if (!check_tensor_own_shade_context(weight) ||
73-
!cpu::ShadeDataContext::isDilOwnTheTensor(weight) ||
74-
cpu::ShadeDataContext::getDilTensor(weight).is_public_format()) {
75-
auto packed_desc = dil::convolution_forward::expected_weights_desc(
76-
weight.sizes().vec(),
77-
dil_input.get_data_type(),
78-
stride.vec(),
79-
padding.vec(),
80-
padding.vec(),
81-
dilation.vec(),
82-
groups,
83-
dil::algorithm::convolution_direct,
84-
dil::prop_kind::forward,
85-
dil_input.get_data_type(),
86-
input.sizes().vec());
87-
dbl::comm::reorder_to_desc(weight, packed_desc);
88-
}
62+
dbl::conv::prepack_conv_weights(input, dil_input,
63+
weight, stride, padding, dilation, groups);
8964
dil_weight = dbl::comm::try_gen_dil_tensor(weight);
9065

9166
dil::tensor dil_output = dbl::conv::conv2d_impl(
@@ -133,7 +108,8 @@ std::tuple<at::Tensor, at::Tensor> dil_convolution_backward_weights(
133108
const dil::tensor dil_input = dbl::comm::try_gen_dil_tensor(input);
134109

135110
dil::tensor dil_grad_weight, dil_grad_bias;
136-
auto diff_weight_type = get_dil_data_type(weight.scalar_type());
111+
dil::tensor w = dbl::comm::try_gen_dil_tensor(weight);
112+
auto diff_weight_type = w.get_data_type();
137113
auto weight_size = weight.sizes();
138114

139115
if (bias_defined) {
@@ -176,7 +152,8 @@ std::tuple<at::Tensor,at::Tensor,at::Tensor> AtenIpexCPUDev::dil_convolution_bac
176152
{
177153
DEBUG("AtenIpexCPUDev::dil_convolution_backward\n");
178154
at::Tensor grad_output = grad_output_t.is_contiguous() ? grad_output_t : grad_output_t.contiguous();
179-
155+
CHECK_DNNL_OP_PRE_COND(input);
156+
CHECK_DNNL_OP_PRE_COND(weight);
180157
dbl::comm::reorder_to_bf16_for_mix_prec(input);
181158
dbl::comm::reorder_to_bf16_for_mix_prec(grad_output);
182159
dbl::comm::reorder_to_bf16_for_mix_prec(weight);
@@ -667,7 +644,7 @@ at::Tensor AtenIpexCPUDev::dil_linear(
667644
dbl::comm::reorder_to_bf16_for_mix_prec(weight);
668645

669646
// reshape first if input dim is greater than 2 and the reshape will cost a memory copy.
670-
auto self_reshaped = self.dim() > 2 ? self.reshape({-1, self.size(self.dim() - 1)}) : self;
647+
auto self_reshaped = self.dim() > 2 ? dil_reshape(self, {-1, self.size(self.dim() - 1)}) : self;
671648
const dil::tensor x = dbl::comm::try_gen_dil_tensor(self_reshaped);
672649
const dil::tensor w = dbl::comm::try_gen_dil_tensor(weight);
673650

@@ -704,7 +681,7 @@ at::Tensor AtenIpexCPUDev::dil_linear_fuse_relu(
704681
dbl::comm::reorder_to_bf16_for_mix_prec(weight);
705682

706683
// reshape first if input dim is greater than 2 and the reshape will cost a memory copy.
707-
auto self_reshaped = self.dim() > 2 ? self.reshape({-1, self.size(self.dim() - 1)}) : self;
684+
auto self_reshaped = self.dim() > 2 ? dil_reshape(self, {-1, self.size(self.dim() - 1)}) : self;
708685
const dil::tensor x = dbl::comm::try_gen_dil_tensor(self_reshaped);
709686
const dil::tensor w = dbl::comm::try_gen_dil_tensor(weight);
710687

@@ -740,11 +717,13 @@ at::Tensor AtenIpexCPUDev::dil_linear_backward_input(
740717
at::IntArrayRef input_size, const at::Tensor& grad_output, const at::Tensor& weight){
741718
DEBUG("AtenIpexCPUDev::dil_linear_backward_input\n");
742719

720+
CHECK_DNNL_OP_PRE_COND(grad_output);
721+
CHECK_DNNL_OP_PRE_COND(weight);
743722
dbl::comm::reorder_to_bf16_for_mix_prec(grad_output);
744723
dbl::comm::reorder_to_bf16_for_mix_prec(weight);
745724

746725
auto grad_output_reshaped = grad_output.dim() > 2 ?
747-
grad_output.reshape({-1, grad_output.size(grad_output.dim() - 1)}) : grad_output;
726+
dil_reshape(grad_output, {-1, grad_output.size(grad_output.dim() - 1)}) : grad_output;
748727
dil::tensor grady = dbl::comm::try_gen_dil_tensor(grad_output_reshaped);
749728
const dil::tensor w = dbl::comm::try_gen_dil_tensor(weight);
750729

@@ -766,17 +745,22 @@ std::tuple<at::Tensor, at::Tensor> AtenIpexCPUDev::dil_linear_backward_weights(
766745
const at::Tensor& grad_output, const at::Tensor& input, const at::Tensor& weight, bool bias_defined) {
767746
DEBUG("AtenIpexCPUDev::dil_linear_backward_weights\n");
768747

748+
CHECK_DNNL_OP_PRE_COND(input);
749+
CHECK_DNNL_OP_PRE_COND(grad_output);
750+
CHECK_DNNL_OP_PRE_COND(weight);
769751
dbl::comm::reorder_to_bf16_for_mix_prec(grad_output);
770752
dbl::comm::reorder_to_bf16_for_mix_prec(input);
771753
dbl::comm::reorder_to_bf16_for_mix_prec(weight);
772754

773755
auto grad_output_reshaped = grad_output.dim() > 2 ?
774-
grad_output.reshape({-1, grad_output.size(grad_output.dim() - 1)}) : grad_output;
775-
auto input_reshaped = input.dim() > 2 ? input.reshape({-1, input.size(input.dim() - 1)}) : input;
756+
dil_reshape(grad_output, {-1, grad_output.size(grad_output.dim() - 1)}) : grad_output;
757+
auto input_reshaped = input.dim() > 2 ? dil_reshape(input, {-1, input.size(input.dim() - 1)}) : input;
776758

777759
dil::tensor grady = dbl::comm::try_gen_dil_tensor(grad_output_reshaped);
778760
dil::tensor x = dbl::comm::try_gen_dil_tensor(input_reshaped);
779-
auto diff_weight_type = get_dil_data_type(weight.scalar_type());
761+
dil::tensor w = dbl::comm::try_gen_dil_tensor(weight);
762+
auto diff_weight_type = w.get_data_type();
763+
780764
dil::tensor gradw, gradb;
781765
if (bias_defined) {
782766
dil::inner_product_backward_weights::compute(x, grady, gradw, gradb, diff_weight_type);
@@ -795,13 +779,6 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor> AtenIpexCPUDev::dil_linear_backwa
795779
const at::Tensor& input, const at::Tensor& grad_output,
796780
const at::Tensor& weight, std::array<bool,3> output_mask) {
797781
DEBUG("AtenIpexCPUDev::dil_linear_backward\n");
798-
CHECK_DNNL_OP_PRE_COND(input);
799-
CHECK_DNNL_OP_PRE_COND(grad_output);
800-
CHECK_DNNL_OP_PRE_COND(weight);
801-
802-
dbl::comm::reorder_to_bf16_for_mix_prec(grad_output);
803-
dbl::comm::reorder_to_bf16_for_mix_prec(input);
804-
dbl::comm::reorder_to_bf16_for_mix_prec(weight);
805782

806783
at::Tensor grad_input, grad_weight, grad_bias;
807784
if (output_mask[0]) {
@@ -1304,10 +1281,9 @@ at::Tensor AtenIpexCPUDev::dil__softmax_backward_data(
13041281

13051282
at::Tensor AtenIpexCPUDev::dil_sigmoid(const at::Tensor& self) {
13061283
DEBUG("AtenIpexCPUDev::dil_sigmoid\n");
1307-
1284+
CHECK_DNNL_OP_PRE_COND(self);
13081285
dbl::comm::reorder_to_bf16_for_mix_prec(self);
13091286

1310-
CHECK_DNNL_OP_PRE_COND(self);
13111287
dil::tensor x = dbl::comm::try_gen_dil_tensor(self);
13121288
dil::tensor y;
13131289
dil::eltwise_forward::compute(

torch_ipex/csrc/cpu/ExtendOPs.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -461,38 +461,38 @@ at::Tensor AtenIpexTypeExt::linear(const at::Tensor& input, const at::Tensor& we
461461

462462
at::Tensor AtenIpexTypeExt::linear_fuse_relu(const at::Tensor& input, const at::Tensor& weight, const c10::optional<at::Tensor>& bias) {
463463
RECORD_FUNCTION("linear_fuse_relu", std::vector<c10::IValue>({input, weight, bias}), torch::autograd::Node::peek_at_next_sequence_nr());
464-
return cpu::AtenIpexCPUDev::dil_linear_fuse_relu(input, weight, bias);
464+
return cpu::AtenIpexCPUDev::dil_linear_fuse_relu(input.is_contiguous() ? input : input.contiguous(), weight.is_contiguous() ? weight : weight.contiguous(), bias);
465465
}
466466

467467
std::tuple<at::Tensor, at::Tensor, at::Tensor> AtenIpexTypeExt::linear_backward(const at::Tensor& input, const at::Tensor& grad_output, const at::Tensor& weight, std::array<bool,3> output_mask) {
468468
RECORD_FUNCTION("linear_backward", std::vector<c10::IValue>({input, grad_output, weight}), torch::autograd::Node::peek_at_next_sequence_nr());
469-
return cpu::AtenIpexCPUDev::dil_linear_backward(input.is_contiguous() ? input : input.contiguous(), grad_output, weight, output_mask);
469+
return cpu::AtenIpexCPUDev::dil_linear_backward(input.is_contiguous() ? input : input.contiguous(), grad_output.is_contiguous() ? grad_output : grad_output.contiguous(), weight.is_contiguous() ? weight : weight.contiguous(), output_mask);
470470
}
471471

472472
at::Tensor AtenIpexTypeExt::adaptive_avg_pool2d(at::Tensor const& input, at::IntArrayRef output_size) {
473473
return NewApaptiveAvgPoolingOp::apply(input, output_size);
474474
}
475475

476476
at::Tensor AtenIpexTypeExt::adaptive_avg_pool2d_backward(const at::Tensor& grad_output, const at::Tensor& input) {
477-
return cpu::AtenIpexCPUDev::dil_adaptive_avg_pool2d_backward(grad_output, input);
477+
return cpu::AtenIpexCPUDev::dil_adaptive_avg_pool2d_backward(grad_output.is_contiguous() ? grad_output : grad_output.contiguous(), input.is_contiguous() ? input : input.contiguous());
478478
}
479479

480480
at::Tensor AtenIpexTypeExt::max_pooling(const at::Tensor& input, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode) {
481-
return NewMaxPoolingOp::apply(input, kernel_size, stride, padding, dilation, ceil_mode);
481+
return NewMaxPoolingOp::apply(input.is_contiguous() ? input : input.contiguous(), kernel_size, stride, padding, dilation, ceil_mode);
482482
}
483483

484484
at::Tensor AtenIpexTypeExt::max_pooling_backward(const at::Tensor& grad_output, const at::Tensor& output, const at::Tensor& input, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode) {
485-
return cpu::AtenIpexCPUDev::dil_max_pooling_backward(grad_output, output, input, kernel_size, stride, padding, dilation, ceil_mode);
485+
return cpu::AtenIpexCPUDev::dil_max_pooling_backward(grad_output.is_contiguous() ? grad_output : grad_output.contiguous(), output.is_contiguous() ? output : output.contiguous(), input.is_contiguous() ? input : input.contiguous(), kernel_size, stride, padding, dilation, ceil_mode);
486486
}
487487

488488
at::Tensor AtenIpexTypeExt::reshape(const at::Tensor& input, at::IntArrayRef size) {
489-
return cpu::AtenIpexCPUDev::dil_reshape(input, size);
489+
return cpu::AtenIpexCPUDev::dil_reshape(input.is_contiguous() ? input : input.contiguous(), size);
490490
}
491491

492492

493493
at::Tensor AtenIpexTypeExt::relu_use_dst_for_bwd(const at::Tensor& grad_output, const at::Tensor& output) {
494494
RECORD_FUNCTION("dil_relu_use_dst_for_bwd", std::vector<c10::IValue>({grad_output, output}), torch::autograd::Node::peek_at_next_sequence_nr());
495-
return cpu::AtenIpexCPUDev::dil_relu_use_dst_for_bwd(grad_output, output);
495+
return cpu::AtenIpexCPUDev::dil_relu_use_dst_for_bwd(grad_output.is_contiguous() ? grad_output : grad_output.contiguous(), output.is_contiguous() ? output : output.contiguous());
496496
}
497497

498498
} // namespace torch_ipex

0 commit comments

Comments
 (0)