diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index e487a1826b982..fc5dbfb3427fa 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -1206,8 +1206,8 @@ class __SYCL_EXPORT handler { using KName = std::conditional_t::value, decltype(Wrapper), NameWT>; - kernel_parallel_for_wrapper(Wrapper); + KernelWrapper::wrap(this, Wrapper); #ifndef __SYCL_DEVICE_ONLY__ verifyUsedKernelBundleInternal( detail::string_view{detail::getKernelName()}); @@ -1232,8 +1232,8 @@ class __SYCL_EXPORT handler { #ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ // If parallel_for range rounding is forced then only range rounded // kernel is generated - kernel_parallel_for_wrapper(KernelFunc); + KernelWrapper::wrap(this, KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ verifyUsedKernelBundleInternal( detail::string_view{detail::getKernelName()}); @@ -1281,8 +1281,8 @@ class __SYCL_EXPORT handler { (void)ExecutionRange; (void)Props; - kernel_parallel_for_wrapper(KernelFunc); + KernelWrapper::wrap(this, KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ throwIfActionIsCreated(); verifyUsedKernelBundleInternal( @@ -1369,8 +1369,8 @@ class __SYCL_EXPORT handler { sycl::detail::lambda_arg_type>; (void)NumWorkGroups; (void)Props; - kernel_parallel_for_work_group_wrapper(KernelFunc); + KernelWrapper::wrap(this, KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ throwIfActionIsCreated(); verifyUsedKernelBundleInternal( @@ -1411,8 +1411,8 @@ class __SYCL_EXPORT handler { (void)NumWorkGroups; (void)WorkGroupSize; (void)Props; - kernel_parallel_for_work_group_wrapper(KernelFunc); + KernelWrapper::wrap(this, KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ throwIfActionIsCreated(); verifyUsedKernelBundleInternal( @@ -1552,127 +1552,79 @@ class __SYCL_EXPORT handler { #endif } - template struct KernelPropertiesUnpackerImpl { - // Just pass extra Props... as template parameters to the underlying - // Caller->* member functions. Don't have reflection so try to use - // templates as much as possible to reduce the amount of boilerplate code - // needed. All the type checks are expected to be done at the Caller's - // methods side. - - template - static void kernel_single_task_unpack(handler *h, ArgsTy &&...Args) { - h->kernel_single_task( - std::forward(Args)...); - } - - template - static void kernel_parallel_for_unpack(handler *h, ArgsTy &&...Args) { - h->kernel_parallel_for( - std::forward(Args)...); - } - - template - static void kernel_parallel_for_work_group_unpack(handler *h, - ArgsTy &&...Args) { - h->kernel_parallel_for_work_group( - std::forward(Args)...); - } - }; - - template - struct KernelPropertiesUnpacker : public KernelPropertiesUnpackerImpl<> { - // This should always fail outside the specialization below but must be - // dependent to avoid failing even if not instantiated. - static_assert( - ext::oneapi::experimental::is_property_list::value, - "Template type is not a property list."); - }; - - template - struct KernelPropertiesUnpacker< - ext::oneapi::experimental::detail::properties_t> - : public KernelPropertiesUnpackerImpl {}; - - // Helper function to - // - // * Make use of the KernelPropertiesUnpacker above - // * Decide if we need an extra kernel_handler parameter + // The KernelWrapper below has two purposes. // - // The interface uses a \p Lambda callback to propagate that information back - // to the caller as we need the caller to communicate: + // First, from SYCL 2020, Table 129 (Member functions of the `handler ` class) + // > The callable ... can optionally take a `kernel_handler` ... in + // which > case the SYCL runtime will construct an instance of + // `kernel_handler` > and pass it to the callable. // - // * Name of the method to call - // * Provide explicit template type parameters for the call + // Note: "..." due to slight wording variability between + // single_task/parallel_for (e.g. only parameter vs last). This helper class + // calls `kernel_*` entry points (both hardcoded names known to FE and special + // device-specific entry point attributes) with proper arguments (with/without + // `kernel_handler` argument, depending on the signature of the SYCL kernel + // function). // - // Couldn't think of a better way to achieve both. - template - void unpack(const KernelType &KernelFunc, FuncTy Lambda) { -#ifdef __SYCL_DEVICE_ONLY__ - detail::CheckDeviceCopyable(); -#endif // __SYCL_DEVICE_ONLY__ - using MergedPropertiesT = - typename detail::GetMergedKernelProperties::type; - using Unpacker = KernelPropertiesUnpacker; -#ifndef __SYCL_DEVICE_ONLY__ - // If there are properties provided by get method then process them. - if constexpr (ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod::value) { - processProperties()>( - KernelFunc.get(ext::oneapi::experimental::properties_tag{})); - } -#endif - if constexpr (HasKernelHandlerArg) { - kernel_handler KH; - Lambda(Unpacker{}, this, KernelFunc, KH); - } else { - Lambda(Unpacker{}, this, KernelFunc); - } - } + // Second, it performs a few checks and some properties processing (including + // the one provided via `sycl_ext_oneapi_kernel_properties` extension by + // embedding them into the kernel's type). - // NOTE: to support kernel_handler argument in kernel lambdas, only - // kernel_***_wrapper functions must be called in this code + enum class WrapAs { single_task, parallel_for, parallel_for_work_group }; template < - typename KernelName, typename KernelType, - typename PropertiesT = ext::oneapi::experimental::empty_properties_t> - void kernel_single_task_wrapper(const KernelType &KernelFunc) { - unpack::value>( - KernelFunc, [&](auto Unpacker, auto &&...args) { - Unpacker.template kernel_single_task_unpack( + WrapAs WrapAsVal, typename KernelName, typename KernelType, + typename ElementType, + typename PropertiesT = ext::oneapi::experimental::empty_properties_t, + typename MergedPropertiesT = typename detail::GetMergedKernelProperties< + KernelType, PropertiesT>::type> + struct KernelWrapper; + template + struct KernelWrapper< + WrapAsVal, KernelName, KernelType, ElementType, PropertiesT, + ext::oneapi::experimental::detail::properties_t> { + static void wrap(handler *h, const KernelType &KernelFunc) { +#ifdef __SYCL_DEVICE_ONLY__ + detail::CheckDeviceCopyable(); +#else + // If there are properties provided by get method then process them. + if constexpr (ext::oneapi::experimental::detail:: + HasKernelPropertiesGetMethod< + const KernelType &>::value) { + h->processProperties()>( + KernelFunc.get(ext::oneapi::experimental::properties_tag{})); + } +#endif + auto L = [&](auto &&...args) { + if constexpr (WrapAsVal == WrapAs::single_task) { + h->kernel_single_task( std::forward(args)...); - }); - } - - template < - typename KernelName, typename ElementType, typename KernelType, - typename PropertiesT = ext::oneapi::experimental::empty_properties_t> - void kernel_parallel_for_wrapper(const KernelType &KernelFunc) { - unpack::value>( - KernelFunc, [&](auto Unpacker, auto &&...args) { - Unpacker.template kernel_parallel_for_unpack( + } else if constexpr (WrapAsVal == WrapAs::parallel_for) { + h->kernel_parallel_for( std::forward(args)...); - }); - } - - template < - typename KernelName, typename ElementType, typename KernelType, - typename PropertiesT = ext::oneapi::experimental::empty_properties_t> - void kernel_parallel_for_work_group_wrapper(const KernelType &KernelFunc) { - unpack::value>( - KernelFunc, [&](auto Unpacker, auto &&...args) { - Unpacker.template kernel_parallel_for_work_group_unpack< - KernelName, ElementType, KernelType>( + } else if constexpr (WrapAsVal == WrapAs::parallel_for_work_group) { + h->kernel_parallel_for_work_group( std::forward(args)...); - }); - } + } else { + // Always false, but template-dependent. + static_assert(WrapAsVal != WrapAsVal, "Unexpected WrapAsVal"); + } + }; + if constexpr (detail::KernelLambdaHasKernelHandlerArgT< + KernelType, ElementType>::value) { + kernel_handler KH; + L(KernelFunc, KH); + } else { + L(KernelFunc); + } + } + }; + + // NOTE: to support kernel_handler argument in kernel lambdas, only + // KernelWrapper<...>::wrap() must be called in this code. /// Defines and invokes a SYCL kernel function as a function object type. /// @@ -1692,7 +1644,8 @@ class __SYCL_EXPORT handler { using NameT = typename detail::get_kernel_name_t::name; - kernel_single_task_wrapper(KernelFunc); + KernelWrapper::wrap(this, KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ throwIfActionIsCreated(); throwOnKernelParameterMisuse(); @@ -1995,7 +1948,8 @@ class __SYCL_EXPORT handler { typename TransformUserItemType::type>; (void)NumWorkItems; (void)WorkItemOffset; - kernel_parallel_for_wrapper(KernelFunc); + KernelWrapper::wrap(this, KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ throwIfActionIsCreated(); verifyUsedKernelBundleInternal( @@ -2171,7 +2125,8 @@ class __SYCL_EXPORT handler { using LambdaArgType = sycl::detail::lambda_arg_type>; (void)Kernel; (void)NumWorkItems; - kernel_parallel_for_wrapper(KernelFunc); + KernelWrapper::wrap( + this, KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ throwIfActionIsCreated(); verifyUsedKernelBundleInternal( @@ -2209,7 +2164,8 @@ class __SYCL_EXPORT handler { (void)Kernel; (void)NumWorkItems; (void)WorkItemOffset; - kernel_parallel_for_wrapper(KernelFunc); + KernelWrapper::wrap( + this, KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ throwIfActionIsCreated(); // Ignore any set kernel bundles and use the one associated with the kernel @@ -2248,7 +2204,8 @@ class __SYCL_EXPORT handler { sycl::detail::lambda_arg_type>; (void)Kernel; (void)NDRange; - kernel_parallel_for_wrapper(KernelFunc); + KernelWrapper::wrap( + this, KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ throwIfActionIsCreated(); // Ignore any set kernel bundles and use the one associated with the kernel @@ -2291,7 +2248,8 @@ class __SYCL_EXPORT handler { sycl::detail::lambda_arg_type>; (void)Kernel; (void)NumWorkGroups; - kernel_parallel_for_work_group_wrapper(KernelFunc); + KernelWrapper::wrap(this, KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ throwIfActionIsCreated(); // Ignore any set kernel bundles and use the one associated with the kernel @@ -2333,7 +2291,8 @@ class __SYCL_EXPORT handler { (void)Kernel; (void)NumWorkGroups; (void)WorkGroupSize; - kernel_parallel_for_work_group_wrapper(KernelFunc); + KernelWrapper::wrap(this, KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ throwIfActionIsCreated(); // Ignore any set kernel bundles and use the one associated with the kernel