Skip to content

Commit 7203dea

Browse files
committed
Add dummy template params to memory APIs to delay kernel instantiation
`syclcompat::detail::fill` and `syclcompat::detail::memcpy` define SYCL kernels. Any translation unit including the syclcompat/memory.hpp or util.hpp will contain these kernels, as DPC++'s 2-pass compiler is not currently able to reason about which kernels are actually used. Adding a dummy template parameter (typename T = void) ensures that these functions (and thus their kernels) are only instantiated if used.
1 parent be88160 commit 7203dea

File tree

2 files changed

+62
-2
lines changed

2 files changed

+62
-2
lines changed

sycl/include/syclcompat/memory.hpp

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -603,12 +603,13 @@ class host_buffer {
603603

604604
/// copy 3D matrix specified by \p size from 3D matrix specified by \p from_ptr
605605
/// and \p from_range to another specified by \p to_ptr and \p to_range.
606+
template <typename T = void>
606607
static inline std::vector<sycl::event>
607608
memcpy(sycl::queue q, void *to_ptr, const void *from_ptr,
608609
sycl::range<3> to_range, sycl::range<3> from_range, sycl::id<3> to_id,
609610
sycl::id<3> from_id, sycl::range<3> size,
610611
const std::vector<sycl::event> &dep_events = {}) {
611-
612+
static_assert(std::is_same_v<T, void>, "This syclcompat::detail::memcpy overload only accepts a dummy template parameter.");
612613
std::vector<sycl::event> event_list;
613614

614615
size_t to_slice = to_range.get(1) * to_range.get(0);
@@ -727,19 +728,23 @@ memcpy(sycl::queue q, void *to_ptr, const void *from_ptr,
727728
}
728729

729730
/// memcpy 2D/3D matrix specified by pitched_data.
731+
template <typename T = void>
730732
static inline std::vector<sycl::event>
731733
memcpy(sycl::queue q, pitched_data to, sycl::id<3> to_id, pitched_data from,
732734
sycl::id<3> from_id, sycl::range<3> size) {
735+
static_assert(std::is_same_v<T, void>, "This syclcompat::detail::memcpy overload only accepts a dummy template parameter.");
733736
return memcpy(q, to.get_data_ptr(), from.get_data_ptr(),
734737
sycl::range<3>(to.get_pitch(), to.get_y(), 1),
735738
sycl::range<3>(from.get_pitch(), from.get_y(), 1), to_id,
736739
from_id, size);
737740
}
738741

739742
/// memcpy 2D matrix with pitch.
743+
template <typename T = void>
740744
static inline std::vector<sycl::event>
741745
memcpy(sycl::queue q, void *to_ptr, const void *from_ptr, size_t to_pitch,
742746
size_t from_pitch, size_t x, size_t y) {
747+
static_assert(std::is_same_v<T, void>, "This syclcompat::detail::memcpy overload only accepts a dummy template parameter.");
743748
return memcpy(q, to_ptr, from_ptr, sycl::range<3>(to_pitch, y, 1),
744749
sycl::range<3>(from_pitch, y, 1), sycl::id<3>(0, 0, 0),
745750
sycl::id<3>(0, 0, 0), sycl::range<3>(x, y, 1));
@@ -856,8 +861,10 @@ static sycl::accessor<byte_t, 1, accessMode> get_access(const void *ptr,
856861

857862
namespace experimental {
858863
namespace detail {
864+
template <typename T = void>
859865
static inline std::vector<sycl::event>
860866
memcpy(sycl::queue q, const experimental::memcpy_parameter &param) {
867+
static_assert(std::is_same_v<T, void>, "This syclcompat::experimental::detail::memcpy overload only accepts a dummy template parameter.");
861868
auto to = param.to.pitched;
862869
auto from = param.from.pitched;
863870
#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
@@ -1123,6 +1130,7 @@ static void memcpy(type_identity_t<T> *to_ptr,
11231130
/// specified by \p from_ptr and \p to_ptr. The function will return after the
11241131
/// copy is completed.
11251132
///
1133+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
11261134
/// \param to_ptr Pointer to destination memory address.
11271135
/// \param to_pitch Range of dim x in bytes of destination matrix.
11281136
/// \param from_ptr Pointer to source memory address.
@@ -1131,9 +1139,11 @@ static void memcpy(type_identity_t<T> *to_ptr,
11311139
/// \param y Range of dim y of matrix to be copied.
11321140
/// \param q Queue to execute the copy task.
11331141
/// \returns no return value.
1142+
template <typename T = void>
11341143
static inline void memcpy(void *to_ptr, size_t to_pitch, const void *from_ptr,
11351144
size_t from_pitch, size_t x, size_t y,
11361145
sycl::queue q = get_default_queue()) {
1146+
static_assert(std::is_same_v<T, void>, "This syclcompat::memcpy overload only accepts a dummy template parameter.");
11371147
sycl::event::wait(
11381148
detail::memcpy(q, to_ptr, from_ptr, to_pitch, from_pitch, x, y));
11391149
}
@@ -1146,6 +1156,7 @@ static inline void memcpy(void *to_ptr, size_t to_pitch, const void *from_ptr,
11461156
/// specified by \p from_ptr and \p to_ptr. The return of the function does NOT
11471157
/// guarantee the copy is completed.
11481158
///
1159+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
11491160
/// \param to_ptr Pointer to destination memory address.
11501161
/// \param to_pitch Range of dim x in bytes of destination matrix.
11511162
/// \param from_ptr Pointer to source memory address.
@@ -1154,10 +1165,12 @@ static inline void memcpy(void *to_ptr, size_t to_pitch, const void *from_ptr,
11541165
/// \param y Range of dim y of matrix to be copied.
11551166
/// \param q Queue to execute the copy task.
11561167
/// \returns An event representing the memcpy operation.
1168+
template <typename T = void>
11571169
static inline sycl::event memcpy_async(void *to_ptr, size_t to_pitch,
11581170
const void *from_ptr, size_t from_pitch,
11591171
size_t x, size_t y,
11601172
sycl::queue q = get_default_queue()) {
1173+
static_assert(std::is_same_v<T, void>, "This syclcompat::memcpy overload only accepts a dummy template parameter.");
11611174
auto events = detail::memcpy(q, to_ptr, from_ptr, to_pitch, from_pitch, x, y);
11621175
return detail::combine_events(events, q);
11631176
}
@@ -1168,17 +1181,20 @@ namespace {
11681181
/// by \p from_pos and \p to_pos The copied matrix size is specified by \p size.
11691182
// The function will return after the copy is completed.
11701183
///
1184+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
11711185
/// \param to Destination matrix info.
11721186
/// \param to_pos Position of destination.
11731187
/// \param from Source matrix info.
11741188
/// \param from_pos Position of destination.
11751189
/// \param size Range of the submatrix to be copied.
11761190
/// \param q Queue to execute the copy task.
11771191
/// \returns no return value.
1192+
template <typename T = void>
11781193
static inline void memcpy(pitched_data to, sycl::id<3> to_pos,
11791194
pitched_data from, sycl::id<3> from_pos,
11801195
sycl::range<3> size,
11811196
sycl::queue q = get_default_queue()) {
1197+
static_assert(std::is_same_v<T, void>, "This syclcompat::memcpy overload only accepts a dummy template parameter.");
11821198
sycl::event::wait(detail::memcpy(q, to, to_pos, from, from_pos, size));
11831199
}
11841200
} // namespace
@@ -1195,10 +1211,12 @@ static inline void memcpy(pitched_data to, sycl::id<3> to_pos,
11951211
/// \param size Range of the submatrix to be copied.
11961212
/// \param q Queue to execute the copy task.
11971213
/// \returns An event representing the memcpy operation.
1214+
template <typename T = void>
11981215
static inline sycl::event memcpy_async(pitched_data to, sycl::id<3> to_pos,
11991216
pitched_data from, sycl::id<3> from_pos,
12001217
sycl::range<3> size,
12011218
sycl::queue q = get_default_queue()) {
1219+
static_assert(std::is_same_v<T, void>, "This syclcompat::memcpy overload only accepts a dummy template parameter.");
12021220
auto events = detail::memcpy(q, to, to_pos, from, from_pos, size);
12031221
return detail::combine_events(events, q);
12041222
}
@@ -1243,11 +1261,14 @@ namespace experimental {
12431261
/// [UNSUPPORTED] Synchronously copies 2D/3D memory data specified by \p param .
12441262
/// The function will return after the copy is completed.
12451263
///
1264+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
12461265
/// \param param Memory copy parameters.
12471266
/// \param q Queue to execute the copy task.
12481267
/// \returns no return value.
1268+
template <typename T = void>
12491269
static inline void memcpy(const memcpy_parameter &param,
12501270
sycl::queue q = get_default_queue()) {
1271+
static_assert(std::is_same_v<T, void>, "This syclcompat::memcpy overload only accepts a dummy template parameter.");
12511272
sycl::event::wait(syclcompat::experimental::detail::memcpy(q, param));
12521273
}
12531274

@@ -1257,8 +1278,10 @@ static inline void memcpy(const memcpy_parameter &param,
12571278
/// \param param Memory copy parameters.
12581279
/// \param q Queue to execute the copy task.
12591280
/// \returns no return value.
1281+
template <typename T = void>
12601282
static inline void memcpy_async(const memcpy_parameter &param,
12611283
sycl::queue q = get_default_queue()) {
1284+
static_assert(std::is_same_v<T, void>, "This syclcompat::memcpy overload only accepts a dummy template parameter.");
12621285
syclcompat::experimental::detail::memcpy(q, param);
12631286
}
12641287
} // namespace experimental
@@ -1280,23 +1303,29 @@ static void memset(void *dev_ptr, int value, size_t size,
12801303

12811304
/// \brief Sets 2 bytes data \p value to the first \p size elements starting
12821305
/// from \p dev_ptr in \p q synchronously.
1306+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
12831307
/// \param [in] dev_ptr Pointer to the virtual device memory address.
12841308
/// \param [in] value The value to be set.
12851309
/// \param [in] size Number of elements to be set to the value.
12861310
/// \param [in] q The queue in which the operation is done.
1311+
template <typename T = void>
12871312
static inline void memset_d16(void *dev_ptr, unsigned short value, size_t size,
12881313
sycl::queue q = get_default_queue()) {
1314+
static_assert(std::is_same_v<T, void>, "syclcompat::memset_d16 only accepts a dummy template parameter.");
12891315
detail::fill<unsigned short>(q, dev_ptr, value, size).wait();
12901316
}
12911317

12921318
/// \brief Sets 4 bytes data \p value to the first \p size elements starting
12931319
/// from \p dev_ptr in \p q synchronously.
1320+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
12941321
/// \param [in] dev_ptr Pointer to the virtual device memory address.
12951322
/// \param [in] value The value to be set.
12961323
/// \param [in] size Number of elements to be set to the value.
12971324
/// \param [in] q The queue in which the operation is done.
1325+
template <typename T = void>
12981326
static inline void memset_d32(void *dev_ptr, unsigned int value, size_t size,
12991327
sycl::queue q = get_default_queue()) {
1328+
static_assert(std::is_same_v<T, void>, "syclcompat::memset_d32 only accepts a dummy template parameter.");
13001329
detail::fill<unsigned int>(q, dev_ptr, value, size).wait();
13011330
}
13021331

@@ -1313,118 +1342,141 @@ static inline sycl::event memset_async(void *dev_ptr, int value, size_t size,
13131342

13141343
/// \brief Sets 2 bytes data \p value to the first \p size elements starting
13151344
/// from \p dev_ptr in \p q asynchronously.
1345+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
13161346
/// \param [in] dev_ptr Pointer to the virtual device memory address.
13171347
/// \param [in] value The value to be set.
13181348
/// \param [in] size Number of elements to be set to the value.
13191349
/// \param [in] q The queue in which the operation is done.
13201350
/// \returns An event representing the memset operation.
1351+
template <typename T = void>
13211352
static inline sycl::event
13221353
memset_d16_async(void *dev_ptr, unsigned short value, size_t size,
13231354
sycl::queue q = get_default_queue()) {
1355+
static_assert(std::is_same_v<T, void>, "syclcompat::memset_d16_async only accepts a dummy template parameter.");
13241356
return detail::fill<unsigned short>(q, dev_ptr, value, size);
13251357
}
13261358

13271359
/// \brief Sets 4 bytes data \p value to the first \p size elements starting
13281360
/// from \p dev_ptr in \p q asynchronously.
1361+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
13291362
/// \param [in] dev_ptr Pointer to the virtual device memory address.
13301363
/// \param [in] value The value to be set.
13311364
/// \param [in] size Number of elements to be set to the value.
13321365
/// \param [in] q The queue in which the operation is done.
13331366
/// \returns An event representing the memset operation.
1367+
template <typename T = void>
13341368
static inline sycl::event
13351369
memset_d32_async(void *dev_ptr, unsigned int value, size_t size,
13361370
sycl::queue q = get_default_queue()) {
1371+
static_assert(std::is_same_v<T, void>, "syclcompat::memset_d32_async only accepts a dummy template parameter.");
13371372
return detail::fill<unsigned int>(q, dev_ptr, value, size);
13381373
}
13391374

13401375
namespace {
13411376
/// \brief Sets 1 byte data \p val to the pitched 2D memory region pointed by \p
13421377
/// ptr in \p q synchronously.
1378+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
13431379
/// \param [in] ptr Pointer to the virtual device memory.
13441380
/// \param [in] pitch The pitch size by number of elements, including padding.
13451381
/// \param [in] val The value to be set.
13461382
/// \param [in] x The width of memory region by number of elements.
13471383
/// \param [in] y The height of memory region by number of elements.
13481384
/// \param [in] q The queue in which the operation is done.
1385+
template <typename T = void>
13491386
static inline void memset(void *ptr, size_t pitch, int val, size_t x, size_t y,
13501387
sycl::queue q = get_default_queue()) {
1388+
static_assert(std::is_same_v<T, void>, "This syclcompat::memset overload only accepts a dummy template parameter.");
13511389
sycl::event::wait(detail::memset<unsigned char>(q, ptr, pitch, val, x, y));
13521390
}
13531391
} // namespace
13541392

13551393
/// \brief Sets 2 bytes data \p val to the pitched 2D memory region pointed by
13561394
/// ptr in \p q synchronously.
1395+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
13571396
/// \param [in] ptr Pointer to the virtual device memory.
13581397
/// \param [in] pitch The pitch size by number of elements, including padding.
13591398
/// \param [in] val The value to be set.
13601399
/// \param [in] x The width of memory region by number of elements.
13611400
/// \param [in] y The height of memory region by number of elements.
13621401
/// \param [in] q The queue in which the operation is done.
1402+
template <typename T = void>
13631403
static inline void memset_d16(void *ptr, size_t pitch, unsigned short val,
13641404
size_t x, size_t y,
13651405
sycl::queue q = get_default_queue()) {
1406+
static_assert(std::is_same_v<T, void>, "syclcompat::memset_d16 only accepts a dummy template parameter.");
13661407
sycl::event::wait(detail::memset(q, ptr, pitch, val, x, y));
13671408
}
13681409

13691410
/// \brief Sets 4 bytes data \p val to the pitched 2D memory region pointed by
13701411
/// ptr in \p q synchronously.
1412+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
13711413
/// \param [in] ptr Pointer to the virtual device memory.
13721414
/// \param [in] pitch The pitch size by number of elements, including padding.
13731415
/// \param [in] val The value to be set.
13741416
/// \param [in] x The width of memory region by number of elements.
13751417
/// \param [in] y The height of memory region by number of elements.
13761418
/// \param [in] q The queue in which the operation is done.
1419+
template <typename T = void>
13771420
static inline void memset_d32(void *ptr, size_t pitch, unsigned int val,
13781421
size_t x, size_t y,
13791422
sycl::queue q = get_default_queue()) {
1423+
static_assert(std::is_same_v<T, void>, "syclcompat::memset_d32 only accepts a dummy template parameter.");
13801424
sycl::event::wait(detail::memset(q, ptr, pitch, val, x, y));
13811425
}
13821426

13831427
/// \brief Sets 1 byte data \p val to the pitched 2D memory region pointed by \p
13841428
/// ptr in \p q asynchronously.
1429+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
13851430
/// \param [in] ptr Pointer to the virtual device memory.
13861431
/// \param [in] pitch The pitch size by number of elements, including padding.
13871432
/// \param [in] val The value to be set.
13881433
/// \param [in] x The width of memory region by number of elements.
13891434
/// \param [in] y The height of memory region by number of elements.
13901435
/// \param [in] q The queue in which the operation is done.
13911436
/// \returns An event representing the memset operation.
1437+
template <typename T = void>
13921438
static inline sycl::event memset_async(void *ptr, size_t pitch, int val,
13931439
size_t x, size_t y,
13941440
sycl::queue q = get_default_queue()) {
1395-
1441+
static_assert(std::is_same_v<T, void>, "syclcompat::memset_async only accepts a dummy template parameter.");
13961442
auto events = detail::memset<unsigned char>(q, ptr, pitch, val, x, y);
13971443
return detail::combine_events(events, q);
13981444
}
13991445

14001446
/// \brief Sets 2 bytes data \p val to the pitched 2D memory region pointed by
14011447
/// \p ptr in \p q asynchronously.
1448+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
14021449
/// \param [in] ptr Pointer to the virtual device memory.
14031450
/// \param [in] pitch The pitch size by number of elements, including padding.
14041451
/// \param [in] val The value to be set.
14051452
/// \param [in] x The width of memory region by number of elements.
14061453
/// \param [in] y The height of memory region by number of elements.
14071454
/// \param [in] q The queue in which the operation is done.
14081455
/// \returns An event representing the memset operation.
1456+
template <typename T = void>
14091457
static inline sycl::event
14101458
memset_d16_async(void *ptr, size_t pitch, unsigned short val, size_t x,
14111459
size_t y, sycl::queue q = get_default_queue()) {
1460+
static_assert(std::is_same_v<T, void>, "syclcompat::memset_d16_async only accepts a dummy template parameter.");
14121461
auto events = detail::memset(q, ptr, pitch, val, x, y);
14131462
return detail::combine_events(events, q);
14141463
}
14151464

14161465
/// \brief Sets 4 bytes data \p val to the pitched 2D memory region pointed by
14171466
/// \p ptr in \p q asynchronously.
1467+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
14181468
/// \param [in] ptr Pointer to the virtual device memory.
14191469
/// \param [in] pitch The pitch size by number of elements, including padding.
14201470
/// \param [in] val The value to be set.
14211471
/// \param [in] x The width of memory region by number of elements.
14221472
/// \param [in] y The height of memory region by number of elements.
14231473
/// \param [in] q The queue in which the operation is done.
14241474
/// \returns An event representing the memset operation.
1475+
template <typename T = void>
14251476
static inline sycl::event
14261477
memset_d32_async(void *ptr, size_t pitch, unsigned int val, size_t x, size_t y,
14271478
sycl::queue q = get_default_queue()) {
1479+
static_assert(std::is_same_v<T, void>, "syclcompat::memset_d32_async only accepts a dummy template parameter.");
14281480
auto events = detail::memset(q, ptr, pitch, val, x, y);
14291481
return detail::combine_events(events, q);
14301482
}
@@ -1434,13 +1486,16 @@ namespace {
14341486
/// specify the setted 3D memory size. The function will return after the
14351487
/// memset operation is completed.
14361488
///
1489+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
14371490
/// \param pitch Specify the 3D memory region.
14381491
/// \param value Value to be set.
14391492
/// \param size The setted 3D memory size.
14401493
/// \param q The queue in which the operation is done.
14411494
/// \returns no return value.
1495+
template <typename T = void>
14421496
static inline void memset(pitched_data pitch, int val, sycl::range<3> size,
14431497
sycl::queue q = get_default_queue()) {
1498+
static_assert(std::is_same_v<T, void>, "syclcompat::memset only accepts a dummy template parameter.");
14441499
sycl::event::wait(detail::memset<unsigned char>(q, pitch, val, size));
14451500
}
14461501
} // namespace
@@ -1449,14 +1504,17 @@ static inline void memset(pitched_data pitch, int val, sycl::range<3> size,
14491504
/// specify the setted 3D memory size. The return of the function does NOT
14501505
/// guarantee the memset operation is completed.
14511506
///
1507+
/// \tparam T Dummy template parameter to delay SYCL kernel instantiation
14521508
/// \param pitch Specify the 3D memory region.
14531509
/// \param value Value to be set.
14541510
/// \param size The setted 3D memory size.
14551511
/// \param q The queue in which the operation is done.
14561512
/// \returns An event representing the memset operation.
1513+
template <typename T = void>
14571514
static inline sycl::event memset_async(pitched_data pitch, int val,
14581515
sycl::range<3> size,
14591516
sycl::queue q = get_default_queue()) {
1517+
static_assert(std::is_same_v<T, void>, "syclcompat::memset_async only accepts a dummy template parameter.");
14601518
auto events = detail::memset<unsigned char>(q, pitch, val, size);
14611519
return detail::combine_events(events, q);
14621520
}

sycl/include/syclcompat/util.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,12 @@ template <typename T> struct DataType<sycl::vec<T, 2>> {
8686
using T2 = detail::complex_type<T>;
8787
};
8888

89+
template <typename T = void>
8990
inline void matrix_mem_copy(void *to_ptr, const void *from_ptr, int to_ld,
9091
int from_ld, int rows, int cols, int elem_size,
9192
sycl::queue queue = syclcompat::get_default_queue(),
9293
bool async = false) {
94+
static_assert(std::is_same_v<T, void>, "syclcompat::matrix_mem_copy only accepts a dummy template parameter.");
9395
if (to_ptr == from_ptr && to_ld == from_ld) {
9496
return;
9597
}

0 commit comments

Comments
 (0)