@@ -40,9 +40,9 @@ namespace cpu {
4040 TORCH_INTERNAL_ASSERT_DEBUG_ONLY (tensor.defined ()); \
4141 TORCH_INTERNAL_ASSERT_DEBUG_ONLY (tensor.layout() == c10::kStrided )
4242
43- #define CHECK_ATEN_BF16_USABLE (tensor ) \
44- ShadeDataContext::isDilTensor (tensor) && \
45- ShadeDataContext::isTensorMixPrecision (tensor) && \
43+ #define CHECK_ATEN_BF16_USABLE (tensor ) \
44+ ShadeDataContext::isDilTensor (tensor) && \
45+ ShadeDataContext::isTensorMixPrecision (tensor) && \
4646 ShadeDataContext::getDilStorage (tensor).get_data_type() == dil::data_type::bf16 && \
4747 dbl::comm::try_gen_dil_tensor (tensor).is_public_format()
4848
@@ -2236,6 +2236,7 @@ at::Tensor AtenIpexCPUDev::dil_transpose(const at::Tensor & self, int64_t dim0,
22362236at::Tensor AtenIpexCPUDev::dil_slice (const at::Tensor & self, int64_t dim, int64_t start, int64_t end, int64_t step) {
22372237 DEBUG (" AtenIpexCPUDev::dil_slice\n " );
22382238 CHECK_DNNL_OP_PRE_COND (self);
2239+ dbl::comm::reorder_to_public (self, /* remain_dtype=*/ true );
22392240
22402241 // TODO use weight TAG to decide whether to reorder or not
22412242 dbl::comm::reorder_to_bf16_for_mix_prec (self, true );
@@ -2578,7 +2579,7 @@ at::Tensor AtenIpexCPUDev::dil_index(const at::Tensor & self, at::TensorList ind
25782579at::Tensor AtenIpexCPUDev::dil_shuffle (const at::Tensor & self, at::IntArrayRef view_shape, int64_t dim0, int64_t dim1) {
25792580 DEBUG (" AtenIpexCPUDev::dil_shuffle\n " );
25802581#if defined(IPEX_PROFILE_OP)
2581- RECORD_FUNCTION (" AtenIpexCPUDev::dil_shuffle" , std::vector<c10::IValue>({self }));
2582+ RECORD_FUNCTION (" AtenIpexCPUDev::dil_shuffle" , std::vector<c10::IValue>({}));
25822583#endif
25832584 // NOTE: We do NOT add sanity checks here. Because PyTorch does not has shuffle operator. This dil operator is for fusion and the fusion logic
25842585 // has more sanity checks. We found that there are some models use view + transpose + view to implement shuffle semantic. So IPEX will fuse these
@@ -2594,7 +2595,7 @@ at::Tensor AtenIpexCPUDev::dil_shuffle(const at::Tensor & self, at::IntArrayRef
25942595std::tuple<at::Tensor,at::Tensor> AtenIpexCPUDev::dil__pack_padded_sequence (const at::Tensor & input, const at::Tensor & lengths, bool batch_first) {
25952596 DEBUG (" AtenIpexCPUDev::dil__pack_padded_sequence\n " );
25962597#if defined(IPEX_PROFILE_OP)
2597- RECORD_FUNCTION (" AtenIpexCPUDev::dil__pack_padded_sequence" , std::vector<c10::IValue>({input, lengths }));
2598+ RECORD_FUNCTION (" AtenIpexCPUDev::dil__pack_padded_sequence" , std::vector<c10::IValue>({}));
25982599#endif
25992600 torch_ipex::reset_ipex_func_status ();
26002601
@@ -2638,7 +2639,7 @@ at::Tensor& AtenIpexCPUDev::dil_copy_(
26382639
26392640std::vector<at::Tensor> AtenIpexCPUDev::dil_rnn_layer (const at::Tensor& input, const at::Tensor& w1, const at::Tensor& w2,
26402641 const at::Tensor& w3, const at::Tensor& w4, const at::Tensor& hx, const at::Tensor& cx, bool reverse, int64_t mode,
2641- int64_t hidden_size, int64_t num_layers, bool has_biases, bool train, bool bidirectional, at::IntArrayRef batch_sizes,
2642+ int64_t hidden_size, int64_t num_layers, bool has_biases, bool train, bool bidirectional, at::IntArrayRef batch_sizes,
26422643 const std::vector<float >& scales, const std::vector<int32_t >& shift, bool quantized) {
26432644 DEBUG (" AtenIpexCPUDev::dil_rnn_layer\n " );
26442645
0 commit comments