@@ -599,21 +599,22 @@ usm_ndarray_take(dpctl::tensor::usm_ndarray src,
599
599
std::shared_ptr<shT> host_ind_offsets_shp =
600
600
std::make_shared<shT>(k, ind_allocator);
601
601
602
+ std::copy (ind_sh_sts.begin (), ind_sh_sts.end (),
603
+ host_ind_shapes_strides_shp->begin ());
604
+ std::copy (ind_ptrs.begin (), ind_ptrs.end (), host_ind_ptrs_shp->begin ());
605
+ std::copy (ind_offsets.begin (), ind_offsets.end (),
606
+ host_ind_offsets_shp->begin ());
607
+
602
608
std::vector<sycl::event> host_task_events;
603
609
host_task_events.reserve (5 );
604
610
605
- std::copy (ind_sh_sts.begin (), ind_sh_sts.end (),
606
- host_ind_shapes_strides_shp->begin ());
607
611
sycl::event packed_ind_ptrs_copy_ev = exec_q.copy <char *>(
608
612
host_ind_ptrs_shp->data (), packed_ind_ptrs, host_ind_ptrs_shp->size ());
609
613
610
- std::copy (ind_ptrs.begin (), ind_ptrs.end (), host_ind_ptrs_shp->begin ());
611
614
sycl::event packed_ind_shapes_strides_copy_ev = exec_q.copy <py::ssize_t >(
612
615
host_ind_shapes_strides_shp->data (), packed_ind_shapes_strides,
613
616
host_ind_shapes_strides_shp->size ());
614
617
615
- std::copy (ind_offsets.begin (), ind_offsets.end (),
616
- host_ind_offsets_shp->begin ());
617
618
sycl::event packed_ind_offsets_copy_ev = exec_q.copy <py::ssize_t >(
618
619
host_ind_offsets_shp->data (), packed_ind_offsets,
619
620
host_ind_offsets_shp->size ());
@@ -1010,38 +1011,39 @@ usm_ndarray_put(dpctl::tensor::usm_ndarray dst,
1010
1011
std::shared_ptr<shT> host_ind_offsets_shp =
1011
1012
std::make_shared<shT>(k, ind_allocator);
1012
1013
1014
+ std::copy (ind_sh_sts.begin (), ind_sh_sts.end (),
1015
+ host_ind_shapes_strides_shp->begin ());
1016
+ std::copy (ind_ptrs.begin (), ind_ptrs.end (), host_ind_ptrs_shp->begin ());
1017
+ std::copy (ind_offsets.begin (), ind_offsets.end (),
1018
+ host_ind_offsets_shp->begin ());
1019
+
1013
1020
std::vector<sycl::event> host_task_events;
1014
1021
host_task_events.reserve (5 );
1015
1022
1016
- std::copy (ind_ptrs.begin (), ind_ptrs.end (), host_ind_ptrs_shp->begin ());
1017
- sycl::event device_ind_ptrs_copy_ev = exec_q.copy <char *>(
1023
+ sycl::event packed_ind_ptrs_copy_ev = exec_q.copy <char *>(
1018
1024
host_ind_ptrs_shp->data (), packed_ind_ptrs, host_ind_ptrs_shp->size ());
1019
1025
1020
- std::copy (ind_sh_sts.begin (), ind_sh_sts.end (),
1021
- host_ind_shapes_strides_shp->begin ());
1022
- sycl::event device_ind_shapes_strides_copy_ev = exec_q.copy <py::ssize_t >(
1026
+ sycl::event packed_ind_shapes_strides_copy_ev = exec_q.copy <py::ssize_t >(
1023
1027
host_ind_shapes_strides_shp->data (), packed_ind_shapes_strides,
1024
1028
host_ind_shapes_strides_shp->size ());
1025
1029
1026
- std::copy (ind_offsets.begin (), ind_offsets.end (),
1027
- host_ind_offsets_shp->begin ());
1028
- sycl::event device_ind_offsets_copy_ev = exec_q.copy <py::ssize_t >(
1030
+ sycl::event packed_ind_offsets_copy_ev = exec_q.copy <py::ssize_t >(
1029
1031
host_ind_offsets_shp->data (), packed_ind_offsets,
1030
1032
host_ind_offsets_shp->size ());
1031
1033
1032
1034
sycl::event shared_ptr_cleanup_host_task =
1033
1035
exec_q.submit ([&](sycl::handler &cgh) {
1034
- cgh.depends_on (device_ind_ptrs_copy_ev);
1035
- cgh. depends_on (device_ind_shapes_strides_copy_ev);
1036
- cgh. depends_on (device_ind_offsets_copy_ev );
1037
- cgh.host_task ([host_ind_ptrs_shp , host_ind_shapes_strides_shp,
1038
- host_ind_offsets_shp ]() {});
1036
+ cgh.depends_on ({packed_ind_offsets_copy_ev,
1037
+ packed_ind_shapes_strides_copy_ev,
1038
+ packed_ind_ptrs_copy_ev} );
1039
+ cgh.host_task ([host_ind_offsets_shp , host_ind_shapes_strides_shp,
1040
+ host_ind_ptrs_shp ]() {});
1039
1041
});
1040
1042
host_task_events.push_back (shared_ptr_cleanup_host_task);
1041
1043
1042
- std::vector<sycl::event> ind_pack_depends{device_ind_ptrs_copy_ev ,
1043
- device_ind_shapes_strides_copy_ev ,
1044
- device_ind_offsets_copy_ev };
1044
+ std::vector<sycl::event> ind_pack_depends{packed_ind_ptrs_copy_ev ,
1045
+ packed_ind_shapes_strides_copy_ev ,
1046
+ packed_ind_offsets_copy_ev };
1045
1047
1046
1048
bool is_dst_c_contig = dst.is_c_contiguous ();
1047
1049
bool is_dst_f_contig = dst.is_f_contiguous ();
0 commit comments