Skip to content

Commit c92b0c3

Browse files
[NFCI][SYCL] Inline parallel_for_work_group_lambda_impl helpers
Similar to what has been done for `single_task_lambda_impl` in intel#18020.
1 parent 97d56c1 commit c92b0c3

File tree

1 file changed

+21
-64
lines changed

1 file changed

+21
-64
lines changed

sycl/include/sycl/handler.hpp

Lines changed: 21 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1337,57 +1337,6 @@ class __SYCL_EXPORT handler {
13371337
#endif
13381338
}
13391339

1340-
/// Hierarchical kernel invocation method of a kernel defined as a lambda
1341-
/// encoding the body of each work-group to launch.
1342-
///
1343-
/// Lambda may contain multiple calls to parallel_for_work_item(...) methods
1344-
/// representing the execution on each work-item. Launches NumWorkGroups
1345-
/// work-groups of runtime-defined size.
1346-
///
1347-
/// \param NumWorkGroups is a range describing the number of work-groups in
1348-
/// each dimension.
1349-
/// \param KernelFunc is a lambda representing kernel.
1350-
template <
1351-
typename KernelName, typename KernelType, int Dims,
1352-
typename PropertiesT = ext::oneapi::experimental::empty_properties_t>
1353-
void parallel_for_work_group_lambda_impl(range<Dims> NumWorkGroups,
1354-
PropertiesT Props,
1355-
const KernelType &KernelFunc) {
1356-
using LambdaArgType =
1357-
sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1358-
wrap_kernel<WrapAs::parallel_for_work_group, KernelName, LambdaArgType,
1359-
Dims,
1360-
/*SetNumWorkGroups=*/true>(KernelFunc, nullptr /*Kernel*/,
1361-
Props, NumWorkGroups);
1362-
}
1363-
1364-
/// Hierarchical kernel invocation method of a kernel defined as a lambda
1365-
/// encoding the body of each work-group to launch.
1366-
///
1367-
/// Lambda may contain multiple calls to parallel_for_work_item(...) methods
1368-
/// representing the execution on each work-item. Launches NumWorkGroups
1369-
/// work-groups of WorkGroupSize size.
1370-
///
1371-
/// \param NumWorkGroups is a range describing the number of work-groups in
1372-
/// each dimension.
1373-
/// \param WorkGroupSize is a range describing the size of work-groups in
1374-
/// each dimension.
1375-
/// \param KernelFunc is a lambda representing kernel.
1376-
template <
1377-
typename KernelName, typename KernelType, int Dims,
1378-
typename PropertiesT = ext::oneapi::experimental::empty_properties_t>
1379-
void parallel_for_work_group_lambda_impl(range<Dims> NumWorkGroups,
1380-
range<Dims> WorkGroupSize,
1381-
PropertiesT Props,
1382-
const KernelType &KernelFunc) {
1383-
using LambdaArgType =
1384-
sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1385-
nd_range<Dims> ExecRange =
1386-
nd_range<Dims>(NumWorkGroups * WorkGroupSize, WorkGroupSize);
1387-
wrap_kernel<WrapAs::parallel_for_work_group, KernelName, LambdaArgType,
1388-
Dims>(KernelFunc, nullptr /*Kernel*/, Props, ExecRange);
1389-
}
1390-
13911340
#ifdef SYCL_LANGUAGE_VERSION
13921341
#ifndef __INTEL_SYCL_USE_INTEGRATION_HEADERS
13931342
#define __SYCL_KERNEL_ATTR__ [[clang::sycl_kernel_entry_point(KernelName)]]
@@ -1598,13 +1547,17 @@ class __SYCL_EXPORT handler {
15981547
};
15991548

16001549
template <
1601-
WrapAs WrapAsVal, typename KernelName, typename ElementType = void,
1550+
WrapAs WrapAsVal, typename KernelName, typename ElementTypeParam = void,
16021551
int Dims = 1, bool SetNumWorkGroups = false,
16031552
typename PropertiesT = ext::oneapi::experimental::empty_properties_t,
16041553
typename KernelType, typename MaybeKernelTy, typename... RangeParams>
16051554
void wrap_kernel(const KernelType &KernelFunc, MaybeKernelTy &&MaybeKernel,
16061555
const PropertiesT &Props,
16071556
[[maybe_unused]] RangeParams &&...params) {
1557+
using ElementType = std::conditional_t<
1558+
WrapAsVal == WrapAs::parallel_for_work_group,
1559+
sycl::detail::lambda_arg_type<KernelType, group<Dims>>,
1560+
ElementTypeParam>;
16081561
// TODO: Properties may change the kernel function, so in order to avoid
16091562
// conflicts they should be included in the name.
16101563
using NameT =
@@ -1959,9 +1912,10 @@ class __SYCL_EXPORT handler {
19591912
int Dims>
19601913
void parallel_for_work_group(range<Dims> NumWorkGroups,
19611914
const KernelType &KernelFunc) {
1962-
parallel_for_work_group_lambda_impl<KernelName>(
1963-
NumWorkGroups, ext::oneapi::experimental::empty_properties_t{},
1964-
KernelFunc);
1915+
wrap_kernel<WrapAs::parallel_for_work_group, KernelName,
1916+
void /*auto-detect*/, Dims,
1917+
/*SetNumWorkGroups=*/true>(KernelFunc, nullptr /*Kernel*/,
1918+
{} /*Props*/, NumWorkGroups);
19651919
}
19661920

19671921
/// Hierarchical kernel invocation method of a kernel defined as a lambda
@@ -1981,9 +1935,10 @@ class __SYCL_EXPORT handler {
19811935
void parallel_for_work_group(range<Dims> NumWorkGroups,
19821936
range<Dims> WorkGroupSize,
19831937
const KernelType &KernelFunc) {
1984-
parallel_for_work_group_lambda_impl<KernelName>(
1985-
NumWorkGroups, WorkGroupSize,
1986-
ext::oneapi::experimental::empty_properties_t{}, KernelFunc);
1938+
wrap_kernel<WrapAs::parallel_for_work_group, KernelName,
1939+
void /*auto-detect*/, Dims>(
1940+
KernelFunc, nullptr /*Kernel*/, {} /*Props*/,
1941+
nd_range<Dims>{NumWorkGroups * WorkGroupSize, WorkGroupSize});
19871942
}
19881943

19891944
/// Invokes a SYCL kernel.
@@ -2395,9 +2350,10 @@ class __SYCL_EXPORT handler {
23952350
"member function instead.")
23962351
void parallel_for_work_group(range<Dims> NumWorkGroups, PropertiesT Props,
23972352
const KernelType &KernelFunc) {
2398-
parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
2399-
PropertiesT>(NumWorkGroups, Props,
2400-
KernelFunc);
2353+
wrap_kernel<WrapAs::parallel_for_work_group, KernelName,
2354+
void /*auto-detect*/, Dims,
2355+
/*SetNumWorkGroups=*/true>(KernelFunc, nullptr /*Kernel*/,
2356+
Props, NumWorkGroups);
24012357
}
24022358

24032359
template <typename KernelName = detail::auto_name, typename KernelType,
@@ -2409,9 +2365,10 @@ class __SYCL_EXPORT handler {
24092365
void parallel_for_work_group(range<Dims> NumWorkGroups,
24102366
range<Dims> WorkGroupSize, PropertiesT Props,
24112367
const KernelType &KernelFunc) {
2412-
parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
2413-
PropertiesT>(
2414-
NumWorkGroups, WorkGroupSize, Props, KernelFunc);
2368+
wrap_kernel<WrapAs::parallel_for_work_group, KernelName,
2369+
void /*auto-detect*/, Dims>(
2370+
KernelFunc, nullptr /*Kernel*/, Props,
2371+
nd_range<Dims>{NumWorkGroups * WorkGroupSize, WorkGroupSize});
24152372
}
24162373

24172374
// Explicit copy operations API

0 commit comments

Comments
 (0)