4141using dpctl::tensor::kernels::alignment_utils::is_aligned;
4242using dpctl::tensor::kernels::alignment_utils::required_alignment;
4343
44- using sycl::ext::oneapi::experimental::group_load;
45- using sycl::ext::oneapi::experimental::group_store;
44+ namespace syclex = sycl::ext::oneapi::experimental;
45+ using syclex::group_load;
46+ using syclex::group_store;
47+
48+ constexpr auto striped = syclex::properties{syclex::data_placement_striped};
4649
4750template <typename T>
4851constexpr T dispatch_erf_op (T elem)
@@ -529,8 +532,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
529532 sycl::vec<_DataType_input1, vec_sz> x1{}; \
530533 sycl::vec<_DataType_input2, vec_sz> x2{}; \
531534 \
532- group_load (sg, input1_multi_ptr, x1); \
533- group_load (sg, input2_multi_ptr, x2); \
535+ group_load (sg, input1_multi_ptr, x1, striped); \
536+ group_load (sg, input2_multi_ptr, x2, striped); \
534537 \
535538 res_vec = __vec_operation__; \
536539 } \
@@ -540,8 +543,10 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
540543 sycl::vec<_DataType_input1, vec_sz> tmp_x1{}; \
541544 sycl::vec<_DataType_input2, vec_sz> tmp_x2{}; \
542545 \
543- group_load (sg, input1_multi_ptr, tmp_x1); \
544- group_load (sg, input2_multi_ptr, tmp_x2); \
546+ group_load (sg, input1_multi_ptr, tmp_x1, \
547+ striped); \
548+ group_load (sg, input2_multi_ptr, tmp_x2, \
549+ striped); \
545550 \
546551 sycl::vec<_DataType_output, vec_sz> x1 = \
547552 dpnp_vec_cast<_DataType_output, \
@@ -559,16 +564,16 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
559564 sycl::vec<_DataType_input1, vec_sz> x1{}; \
560565 sycl::vec<_DataType_input2, vec_sz> x2{}; \
561566 \
562- group_load (sg, input1_multi_ptr, x1); \
563- group_load (sg, input2_multi_ptr, x2); \
567+ group_load (sg, input1_multi_ptr, x1, striped); \
568+ group_load (sg, input2_multi_ptr, x2, striped); \
564569 \
565570 for (size_t k = 0 ; k < vec_sz; ++k) { \
566571 const _DataType_output input1_elem = x1[k]; \
567572 const _DataType_output input2_elem = x2[k]; \
568573 res_vec[k] = __operation__; \
569574 } \
570575 } \
571- group_store (sg, res_vec, result_multi_ptr); \
576+ group_store (sg, res_vec, result_multi_ptr, striped); \
572577 } \
573578 else { \
574579 for (size_t k = start + sg.get_local_id ()[0 ]; \
0 commit comments