@@ -1331,11 +1331,6 @@ void copy_numpy_ndarray_into_usm_ndarray(
1331
1331
// Create shared pointers with shape and src/dst strides, copy into device
1332
1332
// memory
1333
1333
using shT = std::vector<py::ssize_t >;
1334
- std::shared_ptr<shT> shp_shape = std::make_shared<shT>(simplified_shape);
1335
- std::shared_ptr<shT> shp_src_strides =
1336
- std::make_shared<shT>(simplified_src_strides);
1337
- std::shared_ptr<shT> shp_dst_strides =
1338
- std::make_shared<shT>(simplified_dst_strides);
1339
1334
1340
1335
// Get implementation function pointer
1341
1336
auto copy_and_cast_from_host_blocking_fn =
@@ -1351,42 +1346,22 @@ void copy_numpy_ndarray_into_usm_ndarray(
1351
1346
throw std::runtime_error (" Unabled to allocate device memory" );
1352
1347
}
1353
1348
1354
- sycl::event copy_shape_ev =
1355
- exec_q.copy <py::ssize_t >(shp_shape->data (), shape_strides, nd);
1356
-
1357
- exec_q.submit ([&](sycl::handler &cgh) {
1358
- cgh.depends_on (copy_shape_ev);
1359
- cgh.host_task ([shp_shape]() {
1360
- // increment shared pointer ref-count to keep it alive
1361
- // till copy operation completes;
1362
- });
1363
- });
1364
-
1365
- sycl::event copy_src_strides_ev = exec_q.copy <py::ssize_t >(
1366
- shp_src_strides->data (), shape_strides + nd, nd);
1367
- exec_q.submit ([&](sycl::handler &cgh) {
1368
- cgh.depends_on (copy_src_strides_ev);
1369
- cgh.host_task ([shp_src_strides]() {
1370
- // increment shared pointer ref-count to keep it alive
1371
- // till copy operation completes;
1372
- });
1373
- });
1349
+ std::shared_ptr<shT> host_shape_strides_shp = std::make_shared<shT>(3 * nd);
1350
+ std::copy (simplified_shape.begin (), simplified_shape.end (),
1351
+ host_shape_strides_shp->begin ());
1352
+ std::copy (simplified_src_strides.begin (), simplified_src_strides.end (),
1353
+ host_shape_strides_shp->begin () + nd);
1354
+ std::copy (simplified_dst_strides.begin (), simplified_dst_strides.end (),
1355
+ host_shape_strides_shp->begin () + 2 * nd);
1374
1356
1375
- sycl::event copy_dst_strides_ev = exec_q.copy <py::ssize_t >(
1376
- shp_dst_strides->data (), shape_strides + 2 * nd, nd);
1377
- exec_q.submit ([&](sycl::handler &cgh) {
1378
- cgh.depends_on (copy_dst_strides_ev);
1379
- cgh.host_task ([shp_dst_strides]() {
1380
- // increment shared pointer ref-count to keep it alive
1381
- // till copy operation completes;
1382
- });
1383
- });
1357
+ sycl::event copy_packed_ev =
1358
+ exec_q.copy <py::ssize_t >(host_shape_strides_shp->data (), shape_strides,
1359
+ host_shape_strides_shp->size ());
1384
1360
1385
1361
copy_and_cast_from_host_blocking_fn (
1386
1362
exec_q, src_nelems, nd, shape_strides, src_data, src_offset,
1387
1363
npy_src_min_nelem_offset, npy_src_max_nelem_offset, dst_data,
1388
- dst_offset, depends,
1389
- {copy_shape_ev, copy_src_strides_ev, copy_dst_strides_ev});
1364
+ dst_offset, depends, {copy_packed_ev});
1390
1365
1391
1366
sycl::free (shape_strides, exec_q);
1392
1367
0 commit comments