@@ -599,22 +599,21 @@ usm_ndarray_take(dpctl::tensor::usm_ndarray src,
599
599
std::shared_ptr<shT> host_ind_offsets_shp =
600
600
std::make_shared<shT>(k, ind_allocator);
601
601
602
- std::copy (ind_sh_sts.begin (), ind_sh_sts.end (),
603
- host_ind_shapes_strides_shp->begin ());
604
- std::copy (ind_ptrs.begin (), ind_ptrs.end (), host_ind_ptrs_shp->begin ());
605
- std::copy (ind_offsets.begin (), ind_offsets.end (),
606
- host_ind_offsets_shp->begin ());
607
-
608
602
std::vector<sycl::event> host_task_events;
609
603
host_task_events.reserve (5 );
610
604
605
+ std::copy (ind_sh_sts.begin (), ind_sh_sts.end (),
606
+ host_ind_shapes_strides_shp->begin ());
611
607
sycl::event packed_ind_ptrs_copy_ev = exec_q.copy <char *>(
612
608
host_ind_ptrs_shp->data (), packed_ind_ptrs, host_ind_ptrs_shp->size ());
613
609
610
+ std::copy (ind_ptrs.begin (), ind_ptrs.end (), host_ind_ptrs_shp->begin ());
614
611
sycl::event packed_ind_shapes_strides_copy_ev = exec_q.copy <py::ssize_t >(
615
612
host_ind_shapes_strides_shp->data (), packed_ind_shapes_strides,
616
613
host_ind_shapes_strides_shp->size ());
617
614
615
+ std::copy (ind_offsets.begin (), ind_offsets.end (),
616
+ host_ind_offsets_shp->begin ());
618
617
sycl::event packed_ind_offsets_copy_ev = exec_q.copy <py::ssize_t >(
619
618
host_ind_offsets_shp->data (), packed_ind_offsets,
620
619
host_ind_offsets_shp->size ());
@@ -1011,40 +1010,34 @@ usm_ndarray_put(dpctl::tensor::usm_ndarray dst,
1011
1010
std::shared_ptr<shT> host_ind_offsets_shp =
1012
1011
std::make_shared<shT>(k, ind_allocator);
1013
1012
1014
- std::copy (ind_sh_sts.begin (), ind_sh_sts.end (),
1015
- host_ind_shapes_strides_shp->begin ());
1016
- std::copy (ind_ptrs.begin (), ind_ptrs.end (), host_ind_ptrs_shp->begin ());
1017
- std::copy (ind_offsets.begin (), ind_offsets.end (),
1018
- host_ind_offsets_shp->begin ());
1019
-
1020
1013
std::vector<sycl::event> host_task_events;
1021
- host_task_events.reserve (7 );
1014
+ host_task_events.reserve (5 );
1022
1015
1016
+ std::copy (ind_ptrs.begin (), ind_ptrs.end (), host_ind_ptrs_shp->begin ());
1023
1017
sycl::event device_ind_ptrs_copy_ev = exec_q.copy <char *>(
1024
1018
host_ind_ptrs_shp->data (), packed_ind_ptrs, host_ind_ptrs_shp->size ());
1025
- sycl::event ind_ptrs_host_task = exec_q.submit ([&](sycl::handler &cgh) {
1026
- cgh.depends_on (device_ind_ptrs_copy_ev);
1027
- cgh.host_task ([host_ind_ptrs_shp]() {});
1028
- });
1029
- host_task_events.push_back (ind_ptrs_host_task);
1030
1019
1020
+ std::copy (ind_sh_sts.begin (), ind_sh_sts.end (),
1021
+ host_ind_shapes_strides_shp->begin ());
1031
1022
sycl::event device_ind_shapes_strides_copy_ev = exec_q.copy <py::ssize_t >(
1032
1023
host_ind_shapes_strides_shp->data (), packed_ind_shapes_strides,
1033
1024
host_ind_shapes_strides_shp->size ());
1034
- sycl::event ind_sh_st_host_task = exec_q.submit ([&](sycl::handler &cgh) {
1035
- cgh.depends_on (device_ind_shapes_strides_copy_ev);
1036
- cgh.host_task ([host_ind_shapes_strides_shp]() {});
1037
- });
1038
- host_task_events.push_back (ind_sh_st_host_task);
1039
1025
1026
+ std::copy (ind_offsets.begin (), ind_offsets.end (),
1027
+ host_ind_offsets_shp->begin ());
1040
1028
sycl::event device_ind_offsets_copy_ev = exec_q.copy <py::ssize_t >(
1041
1029
host_ind_offsets_shp->data (), packed_ind_offsets,
1042
1030
host_ind_offsets_shp->size ());
1043
- sycl::event ind_offsets_host_task = exec_q.submit ([&](sycl::handler &cgh) {
1044
- cgh.depends_on (device_ind_offsets_copy_ev);
1045
- cgh.host_task ([host_ind_offsets_shp]() {});
1046
- });
1047
- host_task_events.push_back (ind_offsets_host_task);
1031
+
1032
+ sycl::event shared_ptr_cleanup_host_task =
1033
+ exec_q.submit ([&](sycl::handler &cgh) {
1034
+ cgh.depends_on (device_ind_ptrs_copy_ev);
1035
+ cgh.depends_on (device_ind_shapes_strides_copy_ev);
1036
+ cgh.depends_on (device_ind_offsets_copy_ev);
1037
+ cgh.host_task ([host_ind_ptrs_shp, host_ind_shapes_strides_shp,
1038
+ host_ind_offsets_shp]() {});
1039
+ });
1040
+ host_task_events.push_back (shared_ptr_cleanup_host_task);
1048
1041
1049
1042
std::vector<sycl::event> ind_pack_depends{device_ind_ptrs_copy_ev,
1050
1043
device_ind_shapes_strides_copy_ev,
0 commit comments