diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 758daa3a81a9b..4fef8be3da705 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -2292,49 +2292,64 @@ class __SYCL_EXPORT handler { template - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value> - single_task(PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) { + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::single_task (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t::value> single_task(PropertiesT Props, + _KERNELFUNCPARAM(KernelFunc)) { single_task_lambda_impl(Props, KernelFunc); } template - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value> - parallel_for(range<1> NumWorkItems, PropertiesT Props, - _KERNELFUNCPARAM(KernelFunc)) { + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::single_task (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t::value> parallel_for(range<1> NumWorkItems, + PropertiesT Props, + _KERNELFUNCPARAM(KernelFunc)) { parallel_for_lambda_impl( NumWorkItems, Props, std::move(KernelFunc)); } template - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value> - parallel_for(range<2> NumWorkItems, PropertiesT Props, - _KERNELFUNCPARAM(KernelFunc)) { + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::single_task (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t::value> parallel_for(range<2> NumWorkItems, + PropertiesT Props, + _KERNELFUNCPARAM(KernelFunc)) { parallel_for_lambda_impl( NumWorkItems, Props, std::move(KernelFunc)); } template - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value> - parallel_for(range<3> NumWorkItems, PropertiesT Props, - _KERNELFUNCPARAM(KernelFunc)) { + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::single_task (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t::value> parallel_for(range<3> NumWorkItems, + PropertiesT Props, + _KERNELFUNCPARAM(KernelFunc)) { parallel_for_lambda_impl( NumWorkItems, Props, std::move(KernelFunc)); } template - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value> - parallel_for(nd_range Range, PropertiesT Properties, - _KERNELFUNCPARAM(KernelFunc)) { + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::single_task (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t::value> parallel_for(nd_range Range, + PropertiesT Properties, + _KERNELFUNCPARAM(KernelFunc)) { parallel_for_impl(Range, Properties, std::move(KernelFunc)); } @@ -2342,11 +2357,15 @@ class __SYCL_EXPORT handler { template - std::enable_if_t< - (sizeof...(RestT) > 1) && - detail::AreAllButLastReductions::value && - ext::oneapi::experimental::is_property_list::value> - parallel_for(range<1> Range, PropertiesT Properties, RestT &&...Rest) { + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t<(sizeof...(RestT) > 1) && + detail::AreAllButLastReductions::value && + ext::oneapi::experimental::is_property_list< + PropertiesT>::value> parallel_for(range<1> Range, + PropertiesT Properties, + RestT &&...Rest) { #ifndef __SYCL_DEVICE_ONLY__ throwIfGraphAssociated(); @@ -2357,11 +2376,15 @@ class __SYCL_EXPORT handler { template - std::enable_if_t< - (sizeof...(RestT) > 1) && - detail::AreAllButLastReductions::value && - ext::oneapi::experimental::is_property_list::value> - parallel_for(range<2> Range, PropertiesT Properties, RestT &&...Rest) { + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t<(sizeof...(RestT) > 1) && + detail::AreAllButLastReductions::value && + ext::oneapi::experimental::is_property_list< + PropertiesT>::value> parallel_for(range<2> Range, + PropertiesT Properties, + RestT &&...Rest) { #ifndef __SYCL_DEVICE_ONLY__ throwIfGraphAssociated(); @@ -2372,11 +2395,15 @@ class __SYCL_EXPORT handler { template - std::enable_if_t< - (sizeof...(RestT) > 1) && - detail::AreAllButLastReductions::value && - ext::oneapi::experimental::is_property_list::value> - parallel_for(range<3> Range, PropertiesT Properties, RestT &&...Rest) { + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t<(sizeof...(RestT) > 1) && + detail::AreAllButLastReductions::value && + ext::oneapi::experimental::is_property_list< + PropertiesT>::value> parallel_for(range<3> Range, + PropertiesT Properties, + RestT &&...Rest) { #ifndef __SYCL_DEVICE_ONLY__ throwIfGraphAssociated(); @@ -2411,11 +2438,15 @@ class __SYCL_EXPORT handler { template - std::enable_if_t< - (sizeof...(RestT) > 1) && - detail::AreAllButLastReductions::value && - ext::oneapi::experimental::is_property_list::value> - parallel_for(nd_range Range, PropertiesT Properties, RestT &&...Rest) { + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t<(sizeof...(RestT) > 1) && + detail::AreAllButLastReductions::value && + ext::oneapi::experimental::is_property_list< + PropertiesT>::value> parallel_for(nd_range Range, + PropertiesT Properties, + RestT &&...Rest) { #ifndef __SYCL_DEVICE_ONLY__ throwIfGraphAssociated(); @@ -2437,6 +2468,9 @@ class __SYCL_EXPORT handler { template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") void parallel_for_work_group(range NumWorkGroups, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) { parallel_for_work_group_lambda_impl + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") void parallel_for_work_group(range NumWorkGroups, range WorkGroupSize, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) { diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index e2208d452d100..f04b8a1e88960 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -2103,11 +2103,14 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// \param CodeLoc contains the code location of user code template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::single_task (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, event> - single_task( - PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), - const detail::code_location &CodeLoc = detail::code_location::current()) { + ext::oneapi::experimental::is_property_list::value, + event> single_task(PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), + const detail::code_location &CodeLoc = + detail::code_location::current()) { static_assert( (detail::check_fn_signature, void()>::value || @@ -2145,11 +2148,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// \param CodeLoc contains the code location of user code template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::single_task (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, event> - single_task( - event DepEvent, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), - const detail::code_location &CodeLoc = detail::code_location::current()) { + ext::oneapi::experimental::is_property_list::value, + event> single_task(event DepEvent, PropertiesT Properties, + _KERNELFUNCPARAM(KernelFunc), + const detail::code_location &CodeLoc = + detail::code_location::current()) { static_assert( (detail::check_fn_signature, void()>::value || @@ -2191,12 +2198,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// \param CodeLoc contains the code location of user code template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::single_task (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, event> - single_task( - const std::vector &DepEvents, PropertiesT Properties, - _KERNELFUNCPARAM(KernelFunc), - const detail::code_location &CodeLoc = detail::code_location::current()) { + ext::oneapi::experimental::is_property_list::value, + event> single_task(const std::vector &DepEvents, + PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), + const detail::code_location &CodeLoc = + detail::code_location::current()) { static_assert( (detail::check_fn_signature, void()>::value || @@ -2230,6 +2240,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { CodeLoc); } + /// parallel_for version with a kernel represented as a lambda + range that + /// specifies global size only. + /// + /// \param Range specifies the global work space of the kernel + /// \param Properties is the kernel properties. + /// \param Rest acts as-if: "ReductionTypes&&... Reductions, + /// const KernelType &KernelFunc". + template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value, + event> parallel_for(range<1> Range, PropertiesT Properties, + RestT &&...Rest) { + return parallel_for_impl(Range, Properties, Rest...); + } + /// parallel_for version with a kernel represented as a lambda + range that /// specifies global size only. /// @@ -2241,6 +2270,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { return parallel_for_impl(Range, Rest...); } + /// parallel_for version with a kernel represented as a lambda + range that + /// specifies global size only. + /// + /// \param Range specifies the global work space of the kernel + /// \param Properties is the kernel properties. + /// \param Rest acts as-if: "ReductionTypes&&... Reductions, + /// const KernelType &KernelFunc". + template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value, + event> parallel_for(range<2> Range, PropertiesT Properties, + RestT &&...Rest) { + return parallel_for_impl(Range, Properties, Rest...); + } + /// parallel_for version with a kernel represented as a lambda + range that /// specifies global size only. /// @@ -2252,6 +2300,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { return parallel_for_impl(Range, Rest...); } + /// parallel_for version with a kernel represented as a lambda + range that + /// specifies global size only. + /// + /// \param Range specifies the global work space of the kernel + /// \param Properties is the kernel properties. + /// \param Rest acts as-if: "ReductionTypes&&... Reductions, + /// const KernelType &KernelFunc". + template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value, + event> parallel_for(range<3> Range, PropertiesT Properties, + RestT &&...Rest) { + return parallel_for_impl(Range, Properties, Rest...); + } + /// parallel_for version with a kernel represented as a lambda + range that /// specifies global size only. /// @@ -2263,6 +2330,26 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { return parallel_for_impl(Range, Rest...); } + /// parallel_for version with a kernel represented as a lambda + range that + /// specifies global size only. + /// + /// \param Range specifies the global work space of the kernel + /// \param DepEvent is an event that specifies the kernel dependencies + /// \param Properties is the kernel properties. + /// \param Rest acts as-if: "ReductionTypes&&... Reductions, + /// const KernelType &KernelFunc". + template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value, + event> parallel_for(range<1> Range, event DepEvent, + PropertiesT Properties, RestT &&...Rest) { + return parallel_for_impl(Range, DepEvent, Properties, Rest...); + } + /// parallel_for version with a kernel represented as a lambda + range that /// specifies global size only. /// @@ -2275,6 +2362,26 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { return parallel_for_impl(Range, DepEvent, Rest...); } + /// parallel_for version with a kernel represented as a lambda + range that + /// specifies global size only. + /// + /// \param Range specifies the global work space of the kernel + /// \param DepEvent is an event that specifies the kernel dependencies + /// \param Properties is the kernel properties. + /// \param Rest acts as-if: "ReductionTypes&&... Reductions, + /// const KernelType &KernelFunc". + template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value, + event> parallel_for(range<2> Range, event DepEvent, + PropertiesT Properties, RestT &&...Rest) { + return parallel_for_impl(Range, DepEvent, Properties, Rest...); + } + /// parallel_for version with a kernel represented as a lambda + range that /// specifies global size only. /// @@ -2287,6 +2394,26 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { return parallel_for_impl(Range, DepEvent, Rest...); } + /// parallel_for version with a kernel represented as a lambda + range that + /// specifies global size only. + /// + /// \param Range specifies the global work space of the kernel + /// \param DepEvent is an event that specifies the kernel dependencies + /// \param Properties is the kernel properties. + /// \param Rest acts as-if: "ReductionTypes&&... Reductions, + /// const KernelType &KernelFunc". + template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value, + event> parallel_for(range<3> Range, event DepEvent, + PropertiesT Properties, RestT &&...Rest) { + return parallel_for_impl(Range, DepEvent, Properties, Rest...); + } + /// parallel_for version with a kernel represented as a lambda + range that /// specifies global size only. /// @@ -2299,6 +2426,27 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { return parallel_for_impl(Range, DepEvent, Rest...); } + /// parallel_for version with a kernel represented as a lambda + range that + /// specifies global size only. + /// + /// \param Range specifies the global work space of the kernel + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies + /// \param Properties is the kernel properties. + /// \param Rest acts as-if: "ReductionTypes&&... Reductions, + /// const KernelType &KernelFunc". + template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value, + event> parallel_for(range<1> Range, const std::vector &DepEvents, + PropertiesT Properties, RestT &&...Rest) { + return parallel_for_impl(Range, DepEvents, Properties, Rest...); + } + /// parallel_for version with a kernel represented as a lambda + range that /// specifies global size only. /// @@ -2313,6 +2461,27 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { return parallel_for_impl(Range, DepEvents, Rest...); } + /// parallel_for version with a kernel represented as a lambda + range that + /// specifies global size only. + /// + /// \param Range specifies the global work space of the kernel + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies + /// \param Properties is the kernel properties. + /// \param Rest acts as-if: "ReductionTypes&&... Reductions, + /// const KernelType &KernelFunc". + template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value, + event> parallel_for(range<2> Range, const std::vector &DepEvents, + PropertiesT Properties, RestT &&...Rest) { + return parallel_for_impl(Range, DepEvents, Properties, Rest...); + } + /// parallel_for version with a kernel represented as a lambda + range that /// specifies global size only. /// @@ -2327,6 +2496,27 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { return parallel_for_impl(Range, DepEvents, Rest...); } + /// parallel_for version with a kernel represented as a lambda + range that + /// specifies global size only. + /// + /// \param Range specifies the global work space of the kernel + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies + /// \param Properties is the kernel properties. + /// \param Rest acts as-if: "ReductionTypes&&... Reductions, + /// const KernelType &KernelFunc". + template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value, + event> parallel_for(range<3> Range, const std::vector &DepEvents, + PropertiesT Properties, RestT &&...Rest) { + return parallel_for_impl(Range, DepEvents, Properties, Rest...); + } + /// parallel_for version with a kernel represented as a lambda + range that /// specifies global size only. /// @@ -2442,11 +2632,14 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// const KernelType &KernelFunc". template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") std::enable_if_t< detail::AreAllButLastReductions::value && ext::oneapi::experimental::is_property_list::value, - event> - parallel_for(nd_range Range, PropertiesT Properties, RestT &&...Rest) { + event> parallel_for(nd_range Range, PropertiesT Properties, + RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( @@ -2466,8 +2659,41 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { typename... RestT> std::enable_if_t::value, event> parallel_for(nd_range Range, RestT &&...Rest) { - return parallel_for( - Range, ext::oneapi::experimental::empty_properties_t{}, Rest...); + constexpr detail::code_location CodeLoc = getCodeLocation(); + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.template parallel_for(Range, Rest...); + }, + TlsCodeLocCapture.query()); + } + + /// parallel_for version with a kernel represented as a lambda + nd_range that + /// specifies global, local sizes and offset. + /// + /// \param Range specifies the global and local work spaces of the kernel + /// \param DepEvent is an event that specifies the kernel dependencies + /// \param Properties is the kernel properties. + /// \param Rest acts as-if: "ReductionTypes&&... Reductions, + /// const KernelType &KernelFunc". + template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t< + detail::AreAllButLastReductions::value && + ext::oneapi::experimental::is_property_list::value, + event> parallel_for(nd_range Range, event DepEvent, + PropertiesT Properties, RestT &&...Rest) { + constexpr detail::code_location CodeLoc = getCodeLocation(); + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvent); + CGH.template parallel_for(Range, Properties, Rest...); + }, + TlsCodeLocCapture.query()); } /// parallel_for version with a kernel represented as a lambda + nd_range that @@ -2479,7 +2705,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// const KernelType &KernelFunc". template - event parallel_for(nd_range Range, event DepEvent, RestT &&...Rest) { + std::enable_if_t::value, event> + parallel_for(nd_range Range, event DepEvent, RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( @@ -2490,6 +2717,36 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { TlsCodeLocCapture.query()); } + /// parallel_for version with a kernel represented as a lambda + nd_range that + /// specifies global, local sizes and offset. + /// + /// \param Range specifies the global and local work spaces of the kernel + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies + /// \param Properties is the kernel properties. + /// \param Rest acts as-if: "ReductionTypes&&... Reductions, + /// const KernelType &KernelFunc". + template + __SYCL_DEPRECATED( + "Use sycl::ext::oneapi::experimental::parallel_for (provided in the " + "sycl_ext_oneapi_enqueue_functions extension) instead.") + std::enable_if_t< + detail::AreAllButLastReductions::value && + ext::oneapi::experimental::is_property_list::value, + event> parallel_for(nd_range Range, + const std::vector &DepEvents, + PropertiesT Properties, RestT &&...Rest) { + constexpr detail::code_location CodeLoc = getCodeLocation(); + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvents); + CGH.template parallel_for(Range, Properties, Rest...); + }, + TlsCodeLocCapture.query()); + } + /// parallel_for version with a kernel represented as a lambda + nd_range that /// specifies global, local sizes and offset. /// @@ -2500,8 +2757,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// const KernelType &KernelFunc". template - event parallel_for(nd_range Range, const std::vector &DepEvents, - RestT &&...Rest) { + std::enable_if_t::value, event> + parallel_for(nd_range Range, const std::vector &DepEvents, + RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( diff --git a/sycl/test-e2e/Basic/kernel_max_wg_size.cpp b/sycl/test-e2e/Basic/kernel_max_wg_size.cpp index 55c993734df6a..eb0ff5483bae4 100644 --- a/sycl/test-e2e/Basic/kernel_max_wg_size.cpp +++ b/sycl/test-e2e/Basic/kernel_max_wg_size.cpp @@ -29,6 +29,16 @@ __attribute__((noinline)) void f(int *result, nd_item<1> &index) { result[index.get_global_id()] = index.get_global_id(); } +struct KernelFunctor { + int *mResult; + KernelFunctor(int *result) : mResult(result) {} + + void operator()(nd_item<1> index) const { f(mResult, index); } + auto get(syclex::properties_tag) const { + return syclex::properties{intelex::grf_size<256>}; + } +}; + int main() { queue myQueue; auto myContext = myQueue.get_context(); @@ -46,11 +56,9 @@ int main() { nd_range myRange{range{maxWgSize}, range{maxWgSize}}; int *result = sycl::malloc_shared(maxWgSize, myQueue); - syclex::properties kernelProperties{intelex::grf_size<256>}; myQueue.submit([&](handler &cgh) { cgh.use_kernel_bundle(myBundle); - cgh.parallel_for(myRange, kernelProperties, - ([=](nd_item<1> index) { f(result, index); })); + cgh.parallel_for(myRange, KernelFunctor(result)); }); myQueue.wait(); diff --git a/sycl/test-e2e/Basic/sub_group_size_prop.cpp b/sycl/test-e2e/Basic/sub_group_size_prop.cpp index 6da86acd09c45..ae8281903a92b 100644 --- a/sycl/test-e2e/Basic/sub_group_size_prop.cpp +++ b/sycl/test-e2e/Basic/sub_group_size_prop.cpp @@ -44,33 +44,12 @@ void test(queue &Queue, const std::vector SupportedSGSizes) { return; } - auto Props = ext::oneapi::experimental::properties{ - ext::oneapi::experimental::sub_group_size}; - nd_range<1> NdRange(SGSize * 4, SGSize * 2); size_t ReadSubGroupSize = 0; { buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1)); - Queue.submit([&](handler &CGH) { - accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH, - sycl::write_only, sycl::no_init}; - - CGH.parallel_for>( - NdRange, Props, [=](nd_item<1> NdItem) { - auto SG = NdItem.get_sub_group(); - if (NdItem.get_global_linear_id() == 0) - ReadSubGroupSizeBufAcc[0] = SG.get_local_linear_range(); - }); - }); - } - assert(ReadSubGroupSize == SGSize && "Failed check for function."); - - ReadSubGroupSize = 0; - { - buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1)); - Queue.submit([&](handler &CGH) { accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH, sycl::write_only, sycl::no_init}; @@ -81,22 +60,6 @@ void test(queue &Queue, const std::vector SupportedSGSizes) { }); } assert(ReadSubGroupSize == SGSize && "Failed check for functor."); - - ReadSubGroupSize = 0; - { - buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1)); - - Queue.submit([&](handler &CGH) { - accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH, - sycl::write_only, sycl::no_init}; - KernelFunctorWithSGSizeProp KernelFunctor{ReadSubGroupSizeBufAcc}; - - CGH.parallel_for>(NdRange, Props, - KernelFunctor); - }); - } - assert(ReadSubGroupSize == SGSize && - "Failed check for functor and properties."); } int main() { diff --git a/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp b/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp index e37d4ea1f1fb3..01db70b11464a 100644 --- a/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp +++ b/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp @@ -10,6 +10,49 @@ #include +template struct KernelFunctor { + int *mCorrectResultFlag; + T mClusterLaunchProperty; + sycl::range mClusterRange; + KernelFunctor(int *CorrectResultFlag, T ClusterLaunchProperty, + sycl::range ClusterRange) + : mCorrectResultFlag(CorrectResultFlag), + mClusterLaunchProperty(ClusterLaunchProperty), + mClusterRange(ClusterRange) {} + + void operator()(sycl::nd_item It) const { + uint32_t ClusterDimX, ClusterDimY, ClusterDimZ; +// Temporary solution till cluster group class is implemented +#if defined(__SYCL_DEVICE_ONLY__) && defined(__SYCL_CUDA_ARCH__) && \ + (__SYCL_CUDA_ARCH__ >= 900) + asm volatile("\n\t" + "mov.u32 %0, %%cluster_nctaid.x; \n\t" + "mov.u32 %1, %%cluster_nctaid.y; \n\t" + "mov.u32 %2, %%cluster_nctaid.z; \n\t" + : "=r"(ClusterDimZ), "=r"(ClusterDimY), "=r"(ClusterDimX)); +#endif + if constexpr (Dim == 1) { + if (ClusterDimZ == mClusterRange[0] && ClusterDimY == 1 && + ClusterDimX == 1) { + *mCorrectResultFlag = 1; + } + } else if constexpr (Dim == 2) { + if (ClusterDimZ == mClusterRange[1] && ClusterDimY == mClusterRange[0] && + ClusterDimX == 1) { + *mCorrectResultFlag = 1; + } + } else { + if (ClusterDimZ == mClusterRange[2] && ClusterDimY == mClusterRange[1] && + ClusterDimX == mClusterRange[0]) { + *mCorrectResultFlag = 1; + } + } + } + auto get(sycl::ext::oneapi::experimental::properties_tag) const { + return mClusterLaunchProperty; + } +}; + template int test_cluster_launch_parallel_for(sycl::queue &Queue, sycl::range GlobalRange, @@ -25,38 +68,10 @@ int test_cluster_launch_parallel_for(sycl::queue &Queue, Queue .submit([&](sycl::handler &CGH) { - CGH.parallel_for(sycl::nd_range(GlobalRange, LocalRange), - ClusterLaunchProperty, [=](sycl::nd_item It) { - uint32_t ClusterDimX, ClusterDimY, ClusterDimZ; -// Temporary solution till cluster group class is implemented -#if defined(__SYCL_DEVICE_ONLY__) && defined(__SYCL_CUDA_ARCH__) && \ - (__SYCL_CUDA_ARCH__ >= 900) - asm volatile("\n\t" - "mov.u32 %0, %%cluster_nctaid.x; \n\t" - "mov.u32 %1, %%cluster_nctaid.y; \n\t" - "mov.u32 %2, %%cluster_nctaid.z; \n\t" - : "=r"(ClusterDimZ), "=r"(ClusterDimY), - "=r"(ClusterDimX)); -#endif - if constexpr (Dim == 1) { - if (ClusterDimZ == ClusterRange[0] && - ClusterDimY == 1 && ClusterDimX == 1) { - *CorrectResultFlag = 1; - } - } else if constexpr (Dim == 2) { - if (ClusterDimZ == ClusterRange[1] && - ClusterDimY == ClusterRange[0] && - ClusterDimX == 1) { - *CorrectResultFlag = 1; - } - } else { - if (ClusterDimZ == ClusterRange[2] && - ClusterDimY == ClusterRange[1] && - ClusterDimX == ClusterRange[0]) { - *CorrectResultFlag = 1; - } - } - }); + CGH.parallel_for( + sycl::nd_range(GlobalRange, LocalRange), + KernelFunctor( + CorrectResultFlag, ClusterLaunchProperty, ClusterRange)); }) .wait_and_throw(); diff --git a/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp b/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp index 8900d10328871..0460defa72104 100644 --- a/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp +++ b/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp @@ -24,6 +24,22 @@ template void dummy_kernel(T *Input, int N, sycl::nd_item<1> It) { #endif } +template struct KernelFunctor { + T1 mAcc; + T2 mClusterLaunchProperty; + KernelFunctor(T2 ClusterLaunchProperty, T1 Acc) + : mClusterLaunchProperty(ClusterLaunchProperty), mAcc(Acc) {} + + void operator()(sycl::nd_item<1> It) const { + dummy_kernel( + mAcc.template get_multi_ptr().get(), 4096, + It); + } + auto get(sycl::ext::oneapi::experimental::properties_tag) const { + return mClusterLaunchProperty; + } +}; + int main() { std::vector HostArray(4096, -20); @@ -46,13 +62,8 @@ int main() { cuda::cluster_size ClusterDims(sycl::range{2}); properties ClusterLaunchProperty{ClusterDims}; auto Acc = Buff.template get_access(CGH); - CGH.parallel_for( - sycl::nd_range({4096}, {32}), ClusterLaunchProperty, - [=](sycl::nd_item<1> It) { - dummy_kernel( - Acc.get_multi_ptr().get(), 4096, - It); - }); + CGH.parallel_for(sycl::nd_range({4096}, {32}), + KernelFunctor(ClusterLaunchProperty, Acc)); }); Queue.submit([&](sycl::handler &CGH) { auto Acc = Buff.template get_access(CGH); diff --git a/sycl/test-e2e/DeviceCodeSplit/grf.cpp b/sycl/test-e2e/DeviceCodeSplit/grf.cpp index 62f1a76a5f017..4080049f665af 100644 --- a/sycl/test-e2e/DeviceCodeSplit/grf.cpp +++ b/sycl/test-e2e/DeviceCodeSplit/grf.cpp @@ -67,6 +67,15 @@ bool checkResult(const std::vector &A, int Inc) { return true; } +template struct KernelFunctor { + T1 mPA; + T2 mProp; + KernelFunctor(T1 PA, T2 Prop) : mPA(PA), mProp(Prop) {} + + void operator()(id<1> i) const { mPA[i] += 2; } + auto get(properties_tag) const { return mProp; } +}; + int main(void) { constexpr unsigned Size = 32; constexpr unsigned VL = 16; @@ -122,8 +131,8 @@ int main(void) { auto e = q.submit([&](handler &cgh) { auto PA = bufa.get_access(cgh); - cgh.parallel_for( - Size, prop, [=](id<1> i) { PA[i] += 2; }); + cgh.parallel_for(Size, + KernelFunctor(PA, prop)); }); e.wait(); } catch (sycl::exception const &e) { diff --git a/sycl/test-e2e/Graph/Inputs/sub_group_prop.cpp b/sycl/test-e2e/Graph/Inputs/sub_group_prop.cpp index 7c0bfe5161530..adaf6e1977ea4 100644 --- a/sycl/test-e2e/Graph/Inputs/sub_group_prop.cpp +++ b/sycl/test-e2e/Graph/Inputs/sub_group_prop.cpp @@ -39,9 +39,6 @@ void test(queue &Queue, const std::vector SupportedSGSizes) { return; } - auto Props = ext::oneapi::experimental::properties{ - ext::oneapi::experimental::sub_group_size}; - nd_range<1> NdRange(SGSize * 4, SGSize * 2); size_t ReadSubGroupSize = 0; @@ -49,39 +46,6 @@ void test(queue &Queue, const std::vector SupportedSGSizes) { buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1)); ReadSubGroupSizeBuf.set_write_back(false); - { - exp_ext::command_graph Graph{ - Queue.get_context(), - Queue.get_device(), - {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; - - add_node(Graph, Queue, [&](handler &CGH) { - accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH, - sycl::write_only, sycl::no_init}; - - CGH.parallel_for>( - NdRange, Props, [=](nd_item<1> NdItem) { - auto SG = NdItem.get_sub_group(); - if (NdItem.get_global_linear_id() == 0) - ReadSubGroupSizeBufAcc[0] = SG.get_local_linear_range(); - }); - }); - - auto ExecGraph = Graph.finalize(); - Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }); - Queue.wait_and_throw(); - } - - host_accessor HostAcc(ReadSubGroupSizeBuf); - ReadSubGroupSize = HostAcc[0]; - } - assert(ReadSubGroupSize == SGSize && "Failed check for function."); - - ReadSubGroupSize = 0; - { - buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1)); - ReadSubGroupSizeBuf.set_write_back(false); - { exp_ext::command_graph Graph{ Queue.get_context(), @@ -107,38 +71,6 @@ void test(queue &Queue, const std::vector SupportedSGSizes) { ReadSubGroupSize = HostAcc[0]; } assert(ReadSubGroupSize == SGSize && "Failed check for functor."); - - ReadSubGroupSize = 0; - { - buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1)); - ReadSubGroupSizeBuf.set_write_back(false); - - { - exp_ext::command_graph Graph{ - Queue.get_context(), - Queue.get_device(), - {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; - - add_node(Graph, Queue, [&](handler &CGH) { - accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH, - sycl::write_only, sycl::no_init}; - KernelFunctorWithSGSizeProp KernelFunctor{ - ReadSubGroupSizeBufAcc}; - - CGH.parallel_for>( - NdRange, Props, KernelFunctor); - }); - - auto ExecGraph = Graph.finalize(); - Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }); - Queue.wait_and_throw(); - } - - host_accessor HostAcc(ReadSubGroupSizeBuf); - ReadSubGroupSize = HostAcc[0]; - } - assert(ReadSubGroupSize == SGSize && - "Failed check for functor and properties."); } int main() { diff --git a/sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp b/sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp index f01a25d4179f4..48db619d94081 100644 --- a/sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp +++ b/sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp @@ -101,6 +101,24 @@ class MultiplyOp : public BaseOp { } }; +template struct KernelFunctor { + T1 mDeviceStorage; + T2 mDataAcc; + T3 mLocalAcc; + KernelFunctor(T1 DeviceStorage, T2 DataAcc, T3 LocalAcc) + : mDeviceStorage(DeviceStorage), mDataAcc(DataAcc), mLocalAcc(LocalAcc) {} + + void operator()(sycl::nd_item<1> It) const { + auto *Ptr = mDeviceStorage->template getAs(); + mDataAcc[It.get_global_id()] = Ptr->apply( + mLocalAcc.template get_multi_ptr().get(), + It.get_group()); + } + auto get(oneapi::properties_tag) const { + return oneapi::properties{oneapi::assume_indirect_calls}; + } +}; + int main() try { using storage_t = obj_storage_t; @@ -113,7 +131,6 @@ int main() try { sycl::range G{16}; sycl::range L{4}; - constexpr oneapi::properties props{oneapi::assume_indirect_calls}; for (unsigned TestCase = 0; TestCase < 2; ++TestCase) { sycl::buffer DataStorage(G); @@ -126,12 +143,8 @@ int main() try { q.submit([&](sycl::handler &CGH) { sycl::accessor DataAcc(DataStorage, CGH, sycl::read_write); sycl::local_accessor LocalAcc(L, CGH); - CGH.parallel_for(sycl::nd_range{G, L}, props, [=](auto It) { - auto *Ptr = DeviceStorage->getAs(); - DataAcc[It.get_global_id()] = Ptr->apply( - LocalAcc.get_multi_ptr().get(), - It.get_group()); - }); + CGH.parallel_for(sycl::nd_range{G, L}, + KernelFunctor(DeviceStorage, DataAcc, LocalAcc)); }).wait_and_throw(); auto *Ptr = HostStorage.construct(TestCase); diff --git a/sycl/test-e2e/VirtualFunctions/misc/range-non-uniform-vf-2.cpp b/sycl/test-e2e/VirtualFunctions/misc/range-non-uniform-vf-2.cpp index 45b56916a5c1d..c62e65c7b9d69 100644 --- a/sycl/test-e2e/VirtualFunctions/misc/range-non-uniform-vf-2.cpp +++ b/sycl/test-e2e/VirtualFunctions/misc/range-non-uniform-vf-2.cpp @@ -44,6 +44,25 @@ class OpB : public BaseOp { virtual int bar(int V) { return V / 2; } }; +template struct KernelFunctor { + T1 mDeviceStorage; + T2 mDataAcc; + KernelFunctor(T1 DeviceStorage, T2 DataAcc) + : mDeviceStorage(DeviceStorage), mDataAcc(DataAcc) {} + + template void operator()(T It) const { + // Select method that corresponds to this work-item + auto *Ptr = mDeviceStorage->template getAs(); + if (It % 2) + mDataAcc[It] = Ptr->foo(mDataAcc[It]); + else + mDataAcc[It] = Ptr->bar(mDataAcc[It]); + } + auto get(oneapi::properties_tag) const { + return oneapi::properties{oneapi::assume_indirect_calls}; + } +}; + int main() try { using storage_t = obj_storage_t; @@ -54,7 +73,6 @@ int main() try { auto *DeviceStorage = sycl::malloc_shared(1, q); sycl::range R{1024}; - constexpr oneapi::properties props{oneapi::assume_indirect_calls}; for (size_t TestCase = 0; TestCase < 2; ++TestCase) { std::vector HostData(R.size()); std::iota(HostData.begin(), HostData.end(), 0); @@ -69,14 +87,7 @@ int main() try { q.submit([&](sycl::handler &CGH) { sycl::accessor DataAcc(DataStorage, CGH, sycl::read_write); - CGH.parallel_for(R, props, [=](auto It) { - // Select method that corresponds to this work-item - auto *Ptr = DeviceStorage->template getAs(); - if (It % 2) - DataAcc[It] = Ptr->foo(DataAcc[It]); - else - DataAcc[It] = Ptr->bar(DataAcc[It]); - }); + CGH.parallel_for(R, KernelFunctor(DeviceStorage, DataAcc)); }); BaseOp *Ptr = HostStorage.construct(TestCase); diff --git a/sycl/test-e2e/VirtualFunctions/misc/range-non-uniform-vf.cpp b/sycl/test-e2e/VirtualFunctions/misc/range-non-uniform-vf.cpp index 453a3aee81fa6..56b233dbff8cb 100644 --- a/sycl/test-e2e/VirtualFunctions/misc/range-non-uniform-vf.cpp +++ b/sycl/test-e2e/VirtualFunctions/misc/range-non-uniform-vf.cpp @@ -41,6 +41,23 @@ class RoundOp : public BaseOp { virtual float apply(float V) { return sycl::round(V); } }; +template struct KernelFunctor { + T1 mDeviceStorage; + T2 mDataAcc; + KernelFunctor(T1 DeviceStorage, T2 DataAcc) + : mDeviceStorage(DeviceStorage), mDataAcc(DataAcc) {} + + void operator()(sycl::item<1> It) const { + // Select an object that corresponds to this work-item + auto Ind = It % 3; + auto *Ptr = mDeviceStorage[Ind].template getAs(); + mDataAcc[It] = Ptr->apply(mDataAcc[It]); + } + auto get(oneapi::properties_tag) const { + return oneapi::properties{oneapi::assume_indirect_calls}; + } +}; + int main() try { using storage_t = obj_storage_t; @@ -51,7 +68,6 @@ int main() try { auto *DeviceStorage = sycl::malloc_shared(3, q); sycl::range R{1024}; - constexpr oneapi::properties props{oneapi::assume_indirect_calls}; { std::vector HostData(R.size()); for (size_t I = 1; I < HostData.size(); ++I) @@ -69,12 +85,7 @@ int main() try { q.submit([&](sycl::handler &CGH) { sycl::accessor DataAcc(DataStorage, CGH, sycl::read_write); - CGH.parallel_for(R, props, [=](auto it) { - // Select an object that corresponds to this work-item - auto Ind = it % 3; - auto *Ptr = DeviceStorage[Ind].template getAs(); - DataAcc[it] = Ptr->apply(DataAcc[it]); - }); + CGH.parallel_for(R, KernelFunctor(DeviceStorage, DataAcc)); }); BaseOp *Ptr[] = {HostStorage[0].construct(0), diff --git a/sycl/test-e2e/VirtualFunctions/misc/range-uniform-vf.cpp b/sycl/test-e2e/VirtualFunctions/misc/range-uniform-vf.cpp index 66db6a0c5af7a..f624dcb26d66a 100644 --- a/sycl/test-e2e/VirtualFunctions/misc/range-uniform-vf.cpp +++ b/sycl/test-e2e/VirtualFunctions/misc/range-uniform-vf.cpp @@ -41,6 +41,21 @@ class RoundOp : public BaseOp { virtual float apply(float V) { return sycl::round(V); } }; +template struct KernelFunctor { + T1 mDeviceStorage; + T2 mDataAcc; + KernelFunctor(T1 DeviceStorage, T2 DataAcc) + : mDeviceStorage(DeviceStorage), mDataAcc(DataAcc) {} + + void operator()(sycl::id<1> It) const { + auto *Ptr = mDeviceStorage->template getAs(); + mDataAcc[It] = Ptr->apply(mDataAcc[It]); + } + auto get(oneapi::properties_tag) const { + return oneapi::properties{oneapi::assume_indirect_calls}; + } +}; + int main() try { using storage_t = obj_storage_t; @@ -51,7 +66,6 @@ int main() try { auto *DeviceStorage = sycl::malloc_shared(1, q); sycl::range R{1024}; - constexpr oneapi::properties props{oneapi::assume_indirect_calls}; for (unsigned TestCase = 0; TestCase < 3; ++TestCase) { std::vector HostData(R.size()); for (size_t I = 1; I < HostData.size(); ++I) @@ -67,10 +81,7 @@ int main() try { q.submit([&](sycl::handler &CGH) { sycl::accessor DataAcc(DataStorage, CGH, sycl::read_write); - CGH.parallel_for(R, props, [=](auto it) { - auto *Ptr = DeviceStorage->getAs(); - DataAcc[it] = Ptr->apply(DataAcc[it]); - }); + CGH.parallel_for(R, KernelFunctor(DeviceStorage, DataAcc)); }); auto *Ptr = HostStorage.construct(TestCase); diff --git a/sycl/test-e2e/VirtualFunctions/multiple-translation-units/Inputs/call.cpp b/sycl/test-e2e/VirtualFunctions/multiple-translation-units/Inputs/call.cpp index 9ce59931405d6..4ea4e7cf125b7 100644 --- a/sycl/test-e2e/VirtualFunctions/multiple-translation-units/Inputs/call.cpp +++ b/sycl/test-e2e/VirtualFunctions/multiple-translation-units/Inputs/call.cpp @@ -1,17 +1,28 @@ #include "declarations.hpp" +template struct KernelFunctor { + T1 mDeviceStorage; + T2 mDataAcc; + KernelFunctor(T1 &DeviceStorage, T2 &DataAcc) + : mDeviceStorage(DeviceStorage), mDataAcc(DataAcc) {} + + void operator()() const { + auto *Ptr = mDeviceStorage->template getAs(); + Ptr->increment( + mDataAcc.template get_multi_ptr().get()); + } + auto get(oneapi::properties_tag) const { + return oneapi::properties{oneapi::assume_indirect_calls}; + } +}; + int call(sycl::queue Q, storage_t *DeviceStorage, int Init) { int Data = Init; { sycl::buffer DataStorage(&Data, sycl::range{1}); - constexpr oneapi::properties props{oneapi::assume_indirect_calls}; Q.submit([&](sycl::handler &CGH) { sycl::accessor DataAcc(DataStorage, CGH, sycl::write_only); - CGH.single_task(props, [=]() { - auto *Ptr = DeviceStorage->getAs(); - Ptr->increment( - DataAcc.get_multi_ptr().get()); - }); + CGH.single_task(KernelFunctor(DeviceStorage, DataAcc)); }); } diff --git a/sycl/test-e2e/WorkGroupScratchMemory/copy_dynamic_size.cpp b/sycl/test-e2e/WorkGroupScratchMemory/copy_dynamic_size.cpp index 1f61653efc44e..e1716cff85c67 100644 --- a/sycl/test-e2e/WorkGroupScratchMemory/copy_dynamic_size.cpp +++ b/sycl/test-e2e/WorkGroupScratchMemory/copy_dynamic_size.cpp @@ -29,6 +29,17 @@ void copy_via_smem(DataType *a, DataType *b, sycl::nd_item<1> it) { b[threadIdx_x] = smem_ptr[threadIdx_x]; } +template struct KernelFunctor { + T m_props; + DataType *m_a; + DataType *m_b; + KernelFunctor(T props, DataType *a, DataType *b) + : m_props(props), m_a(a), m_b(b) {} + + void operator()(sycl::nd_item<1> it) const { copy_via_smem(m_a, m_b, it); } + auto get(sycl_ext::properties_tag) const { return m_props; } +}; + int main() { sycl::queue queue; DataType *a = sycl::malloc_device(Size, queue); @@ -40,10 +51,12 @@ int main() { queue .submit([&](sycl::handler &cgh) { - cgh.parallel_for(sycl::nd_range<1>({Size}, {Size}), - sycl_ext::properties{sycl_ext::work_group_scratch_size( - Size * sizeof(DataType))}, - [=](sycl::nd_item<1> it) { copy_via_smem(a, b, it); }); + cgh.parallel_for( + sycl::nd_range<1>({Size}, {Size}), + KernelFunctor( + sycl_ext::properties{ + sycl_ext::work_group_scratch_size(Size * sizeof(DataType))}, + a, b)); }) .wait_and_throw(); diff --git a/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_local_accessor.cpp b/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_local_accessor.cpp index 04d8a85a808ff..ebcc17855cbf1 100644 --- a/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_local_accessor.cpp +++ b/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_local_accessor.cpp @@ -23,6 +23,41 @@ using namespace sycl; namespace sycl_ext = sycl::ext::oneapi::experimental; +template struct KernelFunctor { + T1 m_props; + T2 mLocalAccessor; + T3 mAcc; + KernelFunctor(T1 props, T2 LocalAccessor, T3 Acc) + : m_props(props), mLocalAccessor(LocalAccessor), mAcc(Acc) {} + + void operator()(nd_item<1> Item) const { + int *Ptr = + reinterpret_cast(sycl_ext::get_work_group_scratch_memory()); + size_t GroupOffset = Item.get_group_linear_id() * ElemPerWG; + for (size_t I = 0; I < RepeatWG; ++I) { + Ptr[WgSize * I + Item.get_local_linear_id()] = Item.get_local_linear_id(); + } + Item.barrier(); + + for (size_t I = 0; I < RepeatWG; ++I) { + // Check that the local accessor works. + size_t LocalIdx = Item.get_local_linear_id() ^ 1; + mLocalAccessor[WgSize * I + LocalIdx] = Ptr[WgSize * I + LocalIdx] + 1; + } + Item.barrier(); + + for (size_t I = 0; I < RepeatWG; ++I) { + // Check that the memory is accessible from other + // work-items + size_t BaseIdx = GroupOffset + (I * WgSize); + size_t LocalIdx = Item.get_local_linear_id(); + size_t GlobalIdx = BaseIdx + LocalIdx; + mAcc[GlobalIdx] = mLocalAccessor[WgSize * I + LocalIdx]; + } + } + auto get(sycl_ext::properties_tag) const { return m_props; } +}; + int main() { queue Q; std::vector Vec(Size, 0); @@ -36,34 +71,7 @@ int main() { auto LocalAccessor = sycl::local_accessor(WgSize * RepeatWG * sizeof(int), Cgh); Cgh.parallel_for(nd_range<1>(range<1>(WgSize * WgCount), range<1>(WgSize)), - properties, [=](nd_item<1> Item) { - int *Ptr = reinterpret_cast( - sycl_ext::get_work_group_scratch_memory()); - size_t GroupOffset = - Item.get_group_linear_id() * ElemPerWG; - for (size_t I = 0; I < RepeatWG; ++I) { - Ptr[WgSize * I + Item.get_local_linear_id()] = - Item.get_local_linear_id(); - } - Item.barrier(); - - for (size_t I = 0; I < RepeatWG; ++I) { - // Check that the local accessor works. - size_t LocalIdx = Item.get_local_linear_id() ^ 1; - LocalAccessor[WgSize * I + LocalIdx] = - Ptr[WgSize * I + LocalIdx] + 1; - } - Item.barrier(); - - for (size_t I = 0; I < RepeatWG; ++I) { - // Check that the memory is accessible from other - // work-items - size_t BaseIdx = GroupOffset + (I * WgSize); - size_t LocalIdx = Item.get_local_linear_id(); - size_t GlobalIdx = BaseIdx + LocalIdx; - Acc[GlobalIdx] = LocalAccessor[WgSize * I + LocalIdx]; - } - }); + KernelFunctor(properties, LocalAccessor, Acc)); }); host_accessor Acc(Buf, read_only); diff --git a/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_ptr_alias.cpp b/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_ptr_alias.cpp index 46346d5f2ee85..2aba3369ada2f 100644 --- a/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_ptr_alias.cpp +++ b/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_ptr_alias.cpp @@ -23,6 +23,36 @@ using namespace sycl; namespace sycl_ext = sycl::ext::oneapi::experimental; +template struct KernelFunctor { + T1 m_props; + T2 mAcc; + KernelFunctor(T1 props, T2 Acc) : m_props(props), mAcc(Acc) {} + + void operator()(nd_item<1> Item) const { + int *Ptr = + reinterpret_cast(sycl_ext::get_work_group_scratch_memory()); + size_t GroupOffset = Item.get_group_linear_id() * ElemPerWG; + for (size_t I = 0; I < RepeatWG; ++I) { + Ptr[WgSize * I + Item.get_local_linear_id()] = Item.get_local_linear_id(); + } + + Item.barrier(); + // Check that multiple calls return the same pointer. + unsigned int *PtrAlias = reinterpret_cast( + sycl_ext::get_work_group_scratch_memory()); + + for (size_t I = 0; I < RepeatWG; ++I) { + // Check that the memory is accessible from other + // work-items + size_t BaseIdx = GroupOffset + (I * WgSize); + size_t LocalIdx = Item.get_local_linear_id() ^ 1; + size_t GlobalIdx = BaseIdx + LocalIdx; + mAcc[GlobalIdx] = PtrAlias[WgSize * I + LocalIdx]; + } + } + auto get(sycl_ext::properties_tag) const { return m_props; } +}; + int main() { queue Q; std::vector Vec(Size, 0); @@ -34,31 +64,7 @@ int main() { sizeof(int)); sycl_ext::properties properties{static_size}; Cgh.parallel_for(nd_range<1>(range<1>(WgSize * WgCount), range<1>(WgSize)), - properties, [=](nd_item<1> Item) { - int *Ptr = reinterpret_cast( - sycl_ext::get_work_group_scratch_memory()); - size_t GroupOffset = - Item.get_group_linear_id() * ElemPerWG; - for (size_t I = 0; I < RepeatWG; ++I) { - Ptr[WgSize * I + Item.get_local_linear_id()] = - Item.get_local_linear_id(); - } - - Item.barrier(); - // Check that multiple calls return the same pointer. - unsigned int *PtrAlias = - reinterpret_cast( - sycl_ext::get_work_group_scratch_memory()); - - for (size_t I = 0; I < RepeatWG; ++I) { - // Check that the memory is accessible from other - // work-items - size_t BaseIdx = GroupOffset + (I * WgSize); - size_t LocalIdx = Item.get_local_linear_id() ^ 1; - size_t GlobalIdx = BaseIdx + LocalIdx; - Acc[GlobalIdx] = PtrAlias[WgSize * I + LocalIdx]; - } - }); + KernelFunctor(properties, Acc)); }); host_accessor Acc(Buf, read_only); diff --git a/sycl/test-e2e/WorkGroupScratchMemory/dynamic_allocation.cpp b/sycl/test-e2e/WorkGroupScratchMemory/dynamic_allocation.cpp index 224bf2607f772..bf61ddd51a4b3 100644 --- a/sycl/test-e2e/WorkGroupScratchMemory/dynamic_allocation.cpp +++ b/sycl/test-e2e/WorkGroupScratchMemory/dynamic_allocation.cpp @@ -22,6 +22,32 @@ using namespace sycl; namespace sycl_ext = sycl::ext::oneapi::experimental; +template struct KernelFunctor { + T1 m_props; + T2 mAcc; + KernelFunctor(T1 props, T2 Acc) : m_props(props), mAcc(Acc) {} + + void operator()(nd_item<1> Item) const { + int *Ptr = + reinterpret_cast(sycl_ext::get_work_group_scratch_memory()); + size_t GroupOffset = Item.get_group_linear_id() * ElemPerWG; + for (size_t I = 0; I < RepeatWG; ++I) { + Ptr[WgSize * I + Item.get_local_linear_id()] = Item.get_local_linear_id(); + } + + Item.barrier(); + for (size_t I = 0; I < RepeatWG; ++I) { + // Check that the memory is accessible from other + // work-items + size_t BaseIdx = GroupOffset + (I * WgSize); + size_t LocalIdx = Item.get_local_linear_id() ^ 1; + size_t GlobalIdx = BaseIdx + LocalIdx; + mAcc[GlobalIdx] = Ptr[WgSize * I + LocalIdx]; + } + } + auto get(sycl_ext::properties_tag) const { return m_props; } +}; + int main() { queue Q; std::vector Vec(Size, 0); @@ -33,26 +59,7 @@ int main() { sizeof(int)); sycl_ext::properties properties{static_size}; Cgh.parallel_for(nd_range<1>(range<1>(WgSize * WgCount), range<1>(WgSize)), - properties, [=](nd_item<1> Item) { - int *Ptr = reinterpret_cast( - sycl_ext::get_work_group_scratch_memory()); - size_t GroupOffset = - Item.get_group_linear_id() * ElemPerWG; - for (size_t I = 0; I < RepeatWG; ++I) { - Ptr[WgSize * I + Item.get_local_linear_id()] = - Item.get_local_linear_id(); - } - - Item.barrier(); - for (size_t I = 0; I < RepeatWG; ++I) { - // Check that the memory is accessible from other - // work-items - size_t BaseIdx = GroupOffset + (I * WgSize); - size_t LocalIdx = Item.get_local_linear_id() ^ 1; - size_t GlobalIdx = BaseIdx + LocalIdx; - Acc[GlobalIdx] = Ptr[WgSize * I + LocalIdx]; - } - }); + KernelFunctor(properties, Acc)); }); host_accessor Acc(Buf, read_only); diff --git a/sycl/test-e2e/WorkGroupScratchMemory/dynamic_unused.cpp b/sycl/test-e2e/WorkGroupScratchMemory/dynamic_unused.cpp index e427305c18ed3..6608eed567633 100644 --- a/sycl/test-e2e/WorkGroupScratchMemory/dynamic_unused.cpp +++ b/sycl/test-e2e/WorkGroupScratchMemory/dynamic_unused.cpp @@ -14,6 +14,19 @@ using DataType = int; namespace sycl_ext = sycl::ext::oneapi::experimental; +template struct KernelFunctor { + T m_props; + DataType *m_a; + DataType *m_b; + KernelFunctor(T props, DataType *a, DataType *b) + : m_props(props), m_a(a), m_b(b) {} + + void operator()(sycl::nd_item<1> it) const { + m_b[it.get_local_linear_id()] = m_a[it.get_local_linear_id()]; + } + auto get(sycl_ext::properties_tag) const { return m_props; } +}; + int main() { sycl::queue queue; DataType *a = sycl::malloc_device(Size, queue); @@ -25,13 +38,12 @@ int main() { queue .submit([&](sycl::handler &cgh) { - cgh.parallel_for(sycl::nd_range<1>({Size}, {Size}), - sycl_ext::properties{sycl_ext::work_group_scratch_size( - Size * sizeof(DataType))}, - [=](sycl::nd_item<1> it) { - b[it.get_local_linear_id()] = - a[it.get_local_linear_id()]; - }); + cgh.parallel_for( + sycl::nd_range<1>({Size}, {Size}), + KernelFunctor( + sycl_ext::properties{ + sycl_ext::work_group_scratch_size(Size * sizeof(DataType))}, + a, b)); }) .wait_and_throw(); diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_device_has.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_device_has.cpp index 055b25b920b8b..3d1c528744afd 100644 --- a/sycl/test/check_device_code/extensions/properties/properties_kernel_device_has.cpp +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_device_has.cpp @@ -1,5 +1,8 @@ -// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm -Xclang -disable-llvm-passes %s -o - | FileCheck %s --check-prefix CHECK-IR -// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s +// TODO: Currently using the -Wno-deprecated-declarations flag due to issue +// https://github.com/intel/llvm/issues/16320. Remove the flag once the issue is +// resolved. +// RUN: %clangxx -fsycl-device-only -S -Wno-deprecated-declarations -Xclang -emit-llvm -Xclang -disable-llvm-passes %s -o - | FileCheck %s --check-prefix CHECK-IR +// RUN: %clangxx -fsycl -fsyntax-only -Wno-deprecated-declarations -Xclang -verify %s // expected-no-diagnostics #include diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_sub_group_size.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_sub_group_size.cpp index a179c134749e9..ad81d1db1fe0b 100644 --- a/sycl/test/check_device_code/extensions/properties/properties_kernel_sub_group_size.cpp +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_sub_group_size.cpp @@ -1,5 +1,8 @@ -// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR -// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s +// TODO: Currently using the -Wno-deprecated-declarations flag due to issue +// https://github.com/intel/llvm/issues/16320. Remove the flag once the issue is +// resolved. +// RUN: %clangxx -fsycl-device-only -S -Wno-deprecated-declarations -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR +// RUN: %clangxx -fsycl -fsyntax-only -Wno-deprecated-declarations -Xclang -verify %s // expected-no-diagnostics #include diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size.cpp index 932b92fab9009..63280fcc638f3 100644 --- a/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size.cpp +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size.cpp @@ -1,5 +1,8 @@ -// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR -// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s +// TODO: Currently using the -Wno-deprecated-declarations flag due to issue +// https://github.com/intel/llvm/issues/16320. Remove the flag once the issue is +// resolved. +// RUN: %clangxx -fsycl-device-only -S -Wno-deprecated-declarations -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR +// RUN: %clangxx -fsycl -fsyntax-only -Wno-deprecated-declarations -Xclang -verify %s // expected-no-diagnostics #include diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size_hint.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size_hint.cpp index a844b484b8b51..a0bae31ad8004 100644 --- a/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size_hint.cpp +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_work_group_size_hint.cpp @@ -1,5 +1,8 @@ -// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR -// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s +// TODO: Currently using the -Wno-deprecated-declarations flag due to issue +// https://github.com/intel/llvm/issues/16320. Remove the flag once the issue is +// resolved. +// RUN: %clangxx -fsycl-device-only -S -Wno-deprecated-declarations -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR +// RUN: %clangxx -fsycl -fsyntax-only -Wno-deprecated-declarations -Xclang -verify %s // expected-no-diagnostics #include diff --git a/sycl/test/warnings/deprecated_single_task_parallel_for_with_props.cpp b/sycl/test/warnings/deprecated_single_task_parallel_for_with_props.cpp new file mode 100644 index 0000000000000..e4bbf16091808 --- /dev/null +++ b/sycl/test/warnings/deprecated_single_task_parallel_for_with_props.cpp @@ -0,0 +1,65 @@ +// Ignore unexpected warnings because for some reason the warnings are emitted +// twice, e.g. once for `single_task`, then for `single_task>>`. +// RUN: %clangxx -fsycl -sycl-std=2020 -fsycl-device-only -Xclang -verify -Xclang -verify-ignore-unexpected=warning -Xclang -verify-ignore-unexpected=note %s -fsyntax-only -Wall -Wextra +#include + +using namespace sycl; +int main() { + queue Q; + event Ev; + range<1> R1{1}; + range<2> R2(1, 1); + range<3> R3(1, 1, 1); + nd_range<1> NDR1{R1, R1}; + constexpr auto Props = sycl::ext::oneapi::experimental::properties{}; + + // expected-warning@+1{{'single_task' is deprecated: Use sycl::ext::oneapi::experimental::single_task (provided in the sycl_ext_oneapi_enqueue_functions extension) instead.}} + Q.single_task(Props, []() {}); + // expected-warning@+1{{'single_task' is deprecated: Use sycl::ext::oneapi::experimental::single_task (provided in the sycl_ext_oneapi_enqueue_functions extension) instead.}} + Q.single_task(Ev, Props, []() {}); + // expected-warning@+1{{'single_task' is deprecated: Use sycl::ext::oneapi::experimental::single_task (provided in the sycl_ext_oneapi_enqueue_functions extension) instead.}} + Q.single_task({Ev}, Props, []() {}); + + // expected-warning@+1{{'parallel_for' is deprecated: Use sycl::ext::oneapi::experimental::parallel_for (provided in the sycl_ext_oneapi_enqueue_functions extension) instead.}} + Q.parallel_for(NDR1, Props, [](nd_item<1>) {}); + + // expected-warning@+2{{'single_task' is deprecated: Use sycl::ext::oneapi::experimental::single_task (provided in the sycl_ext_oneapi_enqueue_functions extension) instead.}} + Q.submit([&](handler &CGH) { + CGH.single_task(Props, []() {}); + }); + + // expected-warning@+2{{'parallel_for_work_group' is deprecated: Use sycl::ext::oneapi::experimental::parallel_for (provided in the sycl_ext_oneapi_enqueue_functions extension) instead.}} + Q.submit([&](handler &CGH) { + CGH.parallel_for_work_group(R1, Props, + [](sycl::group<1>) {}); + }); + // expected-warning@+2{{'parallel_for_work_group' is deprecated: Use sycl::ext::oneapi::experimental::parallel_for (provided in the sycl_ext_oneapi_enqueue_functions extension) instead.}} + Q.submit([&](handler &CGH) { + CGH.parallel_for_work_group(R2, Props, + [](sycl::group<2>) {}); + }); + // expected-warning@+2{{'parallel_for_work_group' is deprecated: Use sycl::ext::oneapi::experimental::parallel_for (provided in the sycl_ext_oneapi_enqueue_functions extension) instead.}} + Q.submit([&](handler &CGH) { + CGH.parallel_for_work_group(R3, Props, + [](sycl::group<3>) {}); + }); + + // expected-warning@+2{{'parallel_for_work_group' is deprecated: Use sycl::ext::oneapi::experimental::parallel_for (provided in the sycl_ext_oneapi_enqueue_functions extension) instead.}} + Q.submit([&](handler &CGH) { + CGH.parallel_for_work_group(R1, R1, Props, + [](sycl::group<1>) {}); + }); + // expected-warning@+2{{'parallel_for_work_group' is deprecated: Use sycl::ext::oneapi::experimental::parallel_for (provided in the sycl_ext_oneapi_enqueue_functions extension) instead.}} + Q.submit([&](handler &CGH) { + CGH.parallel_for_work_group(R2, R2, Props, + [](sycl::group<2>) {}); + }); + // expected-warning@+2{{'parallel_for_work_group' is deprecated: Use sycl::ext::oneapi::experimental::parallel_for (provided in the sycl_ext_oneapi_enqueue_functions extension) instead.}} + Q.submit([&](handler &CGH) { + CGH.parallel_for_work_group(R3, R3, Props, + [](sycl::group<3>) {}); + }); + return 0; +}