diff --git a/sycl/include/sycl/ext/oneapi/memcpy2d.hpp b/sycl/include/sycl/ext/oneapi/memcpy2d.hpp index 3e29c20c390d9..cfbe0a36ab0b4 100644 --- a/sycl/include/sycl/ext/oneapi/memcpy2d.hpp +++ b/sycl/include/sycl/ext/oneapi/memcpy2d.hpp @@ -18,6 +18,7 @@ template void handler::ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height) { +#ifndef __SYCL_DEVICE_ONLY__ throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_memcpy2d>(); @@ -30,6 +31,7 @@ void handler::ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, throw sycl::exception(sycl::make_error_code(errc::invalid), "Source pitch must be greater than or equal " "to the width specified in 'ext_oneapi_memcpy2d'"); +#endif // Get the type of the pointers. context Ctx = detail::createSyclObjFromImpl(getContextImplPtr()); diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 6aff9b978cf66..f13b7e3835492 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -453,30 +453,6 @@ class __SYCL_EXPORT handler { "a single kernel or explicit memory operation."); } - constexpr static int AccessTargetMask = 0x7ff; - /// According to section 4.7.6.11. of the SYCL specification, a local accessor - /// must not be used in a SYCL kernel function that is invoked via single_task - /// or via the simple form of parallel_for that takes a range parameter. - template - void throwOnLocalAccessorMisuse() const { - using NameT = - typename detail::get_kernel_name_t::name; - for (unsigned I = 0; I < detail::getKernelNumParams(); ++I) { - const detail::kernel_param_desc_t ParamDesc = - detail::getKernelParamDesc(I); - const detail::kernel_param_kind_t &Kind = ParamDesc.kind; - const access::target AccTarget = - static_cast(ParamDesc.info & AccessTargetMask); - if ((Kind == detail::kernel_param_kind_t::kind_accessor) && - (AccTarget == target::local)) - throw sycl::exception( - make_error_code(errc::kernel_argument), - "A local accessor must not be used in a SYCL kernel function " - "that is invoked via single_task or via the simple form of " - "parallel_for that takes a range parameter."); - } - } - /// Extracts and prepares kernel arguments from the lambda using information /// from the built-ins or integration header. void extractArgsAndReqsFromLambda( @@ -1118,12 +1094,14 @@ class __SYCL_EXPORT handler { typename PropertiesT = ext::oneapi::experimental::empty_properties_t> void parallel_for_lambda_impl(range UserRange, PropertiesT Props, KernelType KernelFunc) { +#ifndef __SYCL_DEVICE_ONLY__ throwIfActionIsCreated(); throwOnLocalAccessorMisuse(); if (!range_size_fits_in_size_t(UserRange)) throw sycl::exception(make_error_code(errc::runtime), "The total number of work-items in " "a range must fit within size_t"); +#endif using LambdaArgType = sycl::detail::lambda_arg_type>; @@ -1235,7 +1213,6 @@ class __SYCL_EXPORT handler { typename PropertiesT> void parallel_for_impl(nd_range ExecutionRange, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) { - throwIfActionIsCreated(); // TODO: Properties may change the kernel function, so in order to avoid // conflicts they should be included in the name. using NameT = @@ -1253,6 +1230,7 @@ class __SYCL_EXPORT handler { kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ + throwIfActionIsCreated(); verifyUsedKernelBundleInternal( detail::string_view{detail::getKernelName()}); detail::checkValueRange(ExecutionRange); @@ -1332,7 +1310,6 @@ class __SYCL_EXPORT handler { void parallel_for_work_group_lambda_impl(range NumWorkGroups, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) { - throwIfActionIsCreated(); // TODO: Properties may change the kernel function, so in order to avoid // conflicts they should be included in the name. using NameT = @@ -1344,6 +1321,7 @@ class __SYCL_EXPORT handler { kernel_parallel_for_work_group_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ + throwIfActionIsCreated(); verifyUsedKernelBundleInternal( detail::string_view{detail::getKernelName()}); processProperties(), PropertiesT>(Props); @@ -1374,7 +1352,6 @@ class __SYCL_EXPORT handler { range WorkGroupSize, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) { - throwIfActionIsCreated(); // TODO: Properties may change the kernel function, so in order to avoid // conflicts they should be included in the name. using NameT = @@ -1387,6 +1364,7 @@ class __SYCL_EXPORT handler { kernel_parallel_for_work_group_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ + throwIfActionIsCreated(); verifyUsedKernelBundleInternal( detail::string_view{detail::getKernelName()}); processProperties(), PropertiesT>(Props); @@ -1655,8 +1633,6 @@ class __SYCL_EXPORT handler { void single_task_lambda_impl(PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) { (void)Props; - throwIfActionIsCreated(); - throwOnLocalAccessorMisuse(); // TODO: Properties may change the kernel function, so in order to avoid // conflicts they should be included in the name. using NameT = @@ -1664,6 +1640,8 @@ class __SYCL_EXPORT handler { kernel_single_task_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ + throwIfActionIsCreated(); + throwOnLocalAccessorMisuse(); verifyUsedKernelBundleInternal( detail::string_view{detail::getKernelName()}); // No need to check if range is out of INT_MAX limits as it's compile-time @@ -1931,11 +1909,13 @@ class __SYCL_EXPORT handler { template std::enable_if_t, void(interop_handle)>::value> - ext_codeplay_enqueue_native_command(FuncT &&Func) { + ext_codeplay_enqueue_native_command([[maybe_unused]] FuncT &&Func) { +#ifndef __SYCL_DEVICE_ONLY__ throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_codeplay_enqueue_native_command>(); ext_codeplay_enqueue_native_command_impl(Func); +#endif } /// Defines and invokes a SYCL kernel function for the specified range and @@ -1956,7 +1936,6 @@ class __SYCL_EXPORT handler { __SYCL2020_DEPRECATED("offsets are deprecated in SYCL2020") void parallel_for(range NumWorkItems, id WorkItemOffset, _KERNELFUNCPARAM(KernelFunc)) { - throwIfActionIsCreated(); using NameT = typename detail::get_kernel_name_t::name; using LambdaArgType = sycl::detail::lambda_arg_type>; @@ -1967,6 +1946,7 @@ class __SYCL_EXPORT handler { (void)WorkItemOffset; kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ + throwIfActionIsCreated(); verifyUsedKernelBundleInternal( detail::string_view{detail::getKernelName()}); detail::checkValueRange(NumWorkItems, WorkItemOffset); @@ -2099,7 +2079,6 @@ class __SYCL_EXPORT handler { /// is a host device. template void single_task(kernel Kernel, _KERNELFUNCPARAM(KernelFunc)) { - throwIfActionIsCreated(); // Ignore any set kernel bundles and use the one associated with the kernel setHandlerKernelBundle(Kernel); using NameT = @@ -2107,6 +2086,7 @@ class __SYCL_EXPORT handler { (void)Kernel; kernel_single_task(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ + throwIfActionIsCreated(); verifyUsedKernelBundleInternal( detail::string_view{detail::getKernelName()}); // No need to check if range is out of INT_MAX limits as it's compile-time @@ -2135,7 +2115,6 @@ class __SYCL_EXPORT handler { int Dims> void parallel_for(kernel Kernel, range NumWorkItems, _KERNELFUNCPARAM(KernelFunc)) { - throwIfActionIsCreated(); // Ignore any set kernel bundles and use the one associated with the kernel setHandlerKernelBundle(Kernel); using NameT = @@ -2145,6 +2124,7 @@ class __SYCL_EXPORT handler { (void)NumWorkItems; kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ + throwIfActionIsCreated(); verifyUsedKernelBundleInternal( detail::string_view{detail::getKernelName()}); detail::checkValueRange(NumWorkItems); @@ -2175,9 +2155,6 @@ class __SYCL_EXPORT handler { __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020") void parallel_for(kernel Kernel, range NumWorkItems, id WorkItemOffset, _KERNELFUNCPARAM(KernelFunc)) { - throwIfActionIsCreated(); - // Ignore any set kernel bundles and use the one associated with the kernel - setHandlerKernelBundle(Kernel); using NameT = typename detail::get_kernel_name_t::name; using LambdaArgType = sycl::detail::lambda_arg_type>; @@ -2186,6 +2163,9 @@ class __SYCL_EXPORT handler { (void)WorkItemOffset; kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ + throwIfActionIsCreated(); + // Ignore any set kernel bundles and use the one associated with the kernel + setHandlerKernelBundle(Kernel); verifyUsedKernelBundleInternal( detail::string_view{detail::getKernelName()}); detail::checkValueRange(NumWorkItems, WorkItemOffset); @@ -2215,9 +2195,6 @@ class __SYCL_EXPORT handler { int Dims> void parallel_for(kernel Kernel, nd_range NDRange, _KERNELFUNCPARAM(KernelFunc)) { - throwIfActionIsCreated(); - // Ignore any set kernel bundles and use the one associated with the kernel - setHandlerKernelBundle(Kernel); using NameT = typename detail::get_kernel_name_t::name; using LambdaArgType = @@ -2226,6 +2203,9 @@ class __SYCL_EXPORT handler { (void)NDRange; kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ + throwIfActionIsCreated(); + // Ignore any set kernel bundles and use the one associated with the kernel + setHandlerKernelBundle(Kernel); verifyUsedKernelBundleInternal( detail::string_view{detail::getKernelName()}); detail::checkValueRange(NDRange); @@ -2259,9 +2239,6 @@ class __SYCL_EXPORT handler { int Dims> void parallel_for_work_group(kernel Kernel, range NumWorkGroups, _KERNELFUNCPARAM(KernelFunc)) { - throwIfActionIsCreated(); - // Ignore any set kernel bundles and use the one associated with the kernel - setHandlerKernelBundle(Kernel); using NameT = typename detail::get_kernel_name_t::name; using LambdaArgType = @@ -2270,6 +2247,9 @@ class __SYCL_EXPORT handler { (void)NumWorkGroups; kernel_parallel_for_work_group_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ + throwIfActionIsCreated(); + // Ignore any set kernel bundles and use the one associated with the kernel + setHandlerKernelBundle(Kernel); verifyUsedKernelBundleInternal( detail::string_view{detail::getKernelName()}); detail::checkValueRange(NumWorkGroups); @@ -2300,9 +2280,6 @@ class __SYCL_EXPORT handler { void parallel_for_work_group(kernel Kernel, range NumWorkGroups, range WorkGroupSize, _KERNELFUNCPARAM(KernelFunc)) { - throwIfActionIsCreated(); - // Ignore any set kernel bundles and use the one associated with the kernel - setHandlerKernelBundle(Kernel); using NameT = typename detail::get_kernel_name_t::name; using LambdaArgType = @@ -2312,6 +2289,9 @@ class __SYCL_EXPORT handler { (void)WorkGroupSize; kernel_parallel_for_work_group_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ + throwIfActionIsCreated(); + // Ignore any set kernel bundles and use the one associated with the kernel + setHandlerKernelBundle(Kernel); verifyUsedKernelBundleInternal( detail::string_view{detail::getKernelName()}); nd_range ExecRange = @@ -2381,8 +2361,10 @@ class __SYCL_EXPORT handler { detail::AreAllButLastReductions::value && ext::oneapi::experimental::is_property_list::value> parallel_for(range<1> Range, PropertiesT Properties, RestT &&...Rest) { +#ifndef __SYCL_DEVICE_ONLY__ throwIfGraphAssociated(); +#endif detail::reduction_parallel_for(*this, Range, Properties, std::forward(Rest)...); } @@ -2394,8 +2376,10 @@ class __SYCL_EXPORT handler { detail::AreAllButLastReductions::value && ext::oneapi::experimental::is_property_list::value> parallel_for(range<2> Range, PropertiesT Properties, RestT &&...Rest) { +#ifndef __SYCL_DEVICE_ONLY__ throwIfGraphAssociated(); +#endif detail::reduction_parallel_for(*this, Range, Properties, std::forward(Rest)...); } @@ -2407,8 +2391,10 @@ class __SYCL_EXPORT handler { detail::AreAllButLastReductions::value && ext::oneapi::experimental::is_property_list::value> parallel_for(range<3> Range, PropertiesT Properties, RestT &&...Rest) { +#ifndef __SYCL_DEVICE_ONLY__ throwIfGraphAssociated(); +#endif detail::reduction_parallel_for(*this, Range, Properties, std::forward(Rest)...); } @@ -2444,8 +2430,10 @@ class __SYCL_EXPORT handler { detail::AreAllButLastReductions::value && ext::oneapi::experimental::is_property_list::value> parallel_for(nd_range Range, PropertiesT Properties, RestT &&...Rest) { +#ifndef __SYCL_DEVICE_ONLY__ throwIfGraphAssociated(); +#endif detail::reduction_parallel_for(*this, Range, Properties, std::forward(Rest)...); } @@ -2497,8 +2485,10 @@ class __SYCL_EXPORT handler { access::placeholder IsPlaceholder = access::placeholder::false_t> void copy(accessor Src, std::shared_ptr Dst) { +#ifndef __SYCL_DEVICE_ONLY__ if (Src.is_placeholder()) checkIfPlaceholderIsBoundToHandler(Src); +#endif throwIfActionIsCreated(); static_assert(isValidTargetForExplicitOp(AccessTarget), @@ -2525,8 +2515,10 @@ class __SYCL_EXPORT handler { void copy(std::shared_ptr Src, accessor Dst) { +#ifndef __SYCL_DEVICE_ONLY__ if (Dst.is_placeholder()) checkIfPlaceholderIsBoundToHandler(Dst); +#endif throwIfActionIsCreated(); static_assert(isValidTargetForExplicitOp(AccessTarget), @@ -2554,8 +2546,10 @@ class __SYCL_EXPORT handler { access::placeholder IsPlaceholder = access::placeholder::false_t> void copy(accessor Src, T_Dst *Dst) { +#ifndef __SYCL_DEVICE_ONLY__ if (Src.is_placeholder()) checkIfPlaceholderIsBoundToHandler(Src); +#endif throwIfActionIsCreated(); static_assert(isValidTargetForExplicitOp(AccessTarget), @@ -2587,8 +2581,10 @@ class __SYCL_EXPORT handler { void copy(const T_Src *Src, accessor Dst) { +#ifndef __SYCL_DEVICE_ONLY__ if (Dst.is_placeholder()) checkIfPlaceholderIsBoundToHandler(Dst); +#endif throwIfActionIsCreated(); static_assert(isValidTargetForExplicitOp(AccessTarget), @@ -2629,10 +2625,12 @@ class __SYCL_EXPORT handler { accessor Dst) { +#ifndef __SYCL_DEVICE_ONLY__ if (Src.is_placeholder()) checkIfPlaceholderIsBoundToHandler(Src); if (Dst.is_placeholder()) checkIfPlaceholderIsBoundToHandler(Dst); +#endif throwIfActionIsCreated(); static_assert(isValidTargetForExplicitOp(AccessTarget_Src), @@ -2675,8 +2673,10 @@ class __SYCL_EXPORT handler { access::placeholder IsPlaceholder = access::placeholder::false_t> void update_host(accessor Acc) { +#ifndef __SYCL_DEVICE_ONLY__ if (Acc.is_placeholder()) checkIfPlaceholderIsBoundToHandler(Acc); +#endif throwIfActionIsCreated(); static_assert(isValidTargetForExplicitOp(AccessTarget), @@ -2707,8 +2707,10 @@ class __SYCL_EXPORT handler { fill(accessor Dst, const T &Pattern) { +#ifndef __SYCL_DEVICE_ONLY__ if (Dst.is_placeholder()) checkIfPlaceholderIsBoundToHandler(Dst); +#endif throwIfActionIsCreated(); setUserFacingNodeType(ext::oneapi::experimental::node_type::memfill); @@ -2913,9 +2915,12 @@ class __SYCL_EXPORT handler { /// \param NumBytes is a number of bytes to copy. /// \param DestOffset is the offset into \p Dest to copy to. template - void memcpy(ext::oneapi::experimental::device_global &Dest, - const void *Src, size_t NumBytes = sizeof(T), - size_t DestOffset = 0) { + void memcpy([[maybe_unused]] ext::oneapi::experimental::device_global< + T, PropertyListT> &Dest, + [[maybe_unused]] const void *Src, + [[maybe_unused]] size_t NumBytes = sizeof(T), + [[maybe_unused]] size_t DestOffset = 0) { +#ifndef __SYCL_DEVICE_ONLY__ throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_device_global>(); @@ -2935,6 +2940,7 @@ class __SYCL_EXPORT handler { } memcpyToDeviceGlobal(&Dest, Src, IsDeviceImageScoped, NumBytes, DestOffset); +#endif } /// Copies data from a device_global to USM memory. @@ -2946,10 +2952,12 @@ class __SYCL_EXPORT handler { /// \param NumBytes is a number of bytes to copy. /// \param SrcOffset is the offset into \p Src to copy from. template - void - memcpy(void *Dest, - const ext::oneapi::experimental::device_global &Src, - size_t NumBytes = sizeof(T), size_t SrcOffset = 0) { + void memcpy([[maybe_unused]] void *Dest, + [[maybe_unused]] const ext::oneapi::experimental::device_global< + T, PropertyListT> &Src, + [[maybe_unused]] size_t NumBytes = sizeof(T), + [[maybe_unused]] size_t SrcOffset = 0) { +#ifndef __SYCL_DEVICE_ONLY__ throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_device_global>(); @@ -2970,6 +2978,7 @@ class __SYCL_EXPORT handler { memcpyFromDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes, SrcOffset); +#endif } /// Copies elements of type `std::remove_all_extents_t` from a USM memory @@ -3494,21 +3503,6 @@ class __SYCL_EXPORT handler { bool IsDeviceImageScoped, size_t NumBytes, size_t Offset); - template - void checkIfPlaceholderIsBoundToHandler( - accessor - Acc) { - auto *AccBase = reinterpret_cast(&Acc); - detail::AccessorImplHost *Req = detail::getSyclObjImpl(*AccBase).get(); - if (HasAssociatedAccessor(Req, AccessTarget)) - throw sycl::exception(make_error_code(errc::kernel_argument), - "placeholder accessor must be bound by calling " - "handler::require() before it can be used."); - } - // Changing values in this will break ABI/API. enum class StableKernelCacheConfig : int32_t { Default = 0, @@ -3524,6 +3518,50 @@ class __SYCL_EXPORT handler { // Set using cuda thread block cluster launch flag and set the launch bounds. void setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims); + // Various checks that are only meaningful for host compilation, because they + // result in runtime errors (i.e. exceptions being thrown). To save time + // during device compilations (by reducing amount of templates we have to + // instantiate), those are only available during host compilation pass. +#ifndef __SYCL_DEVICE_ONLY__ + constexpr static int AccessTargetMask = 0x7ff; + /// According to section 4.7.6.11. of the SYCL specification, a local accessor + /// must not be used in a SYCL kernel function that is invoked via single_task + /// or via the simple form of parallel_for that takes a range parameter. + template + void throwOnLocalAccessorMisuse() const { + using NameT = + typename detail::get_kernel_name_t::name; + for (unsigned I = 0; I < detail::getKernelNumParams(); ++I) { + const detail::kernel_param_desc_t ParamDesc = + detail::getKernelParamDesc(I); + const detail::kernel_param_kind_t &Kind = ParamDesc.kind; + const access::target AccTarget = + static_cast(ParamDesc.info & AccessTargetMask); + if ((Kind == detail::kernel_param_kind_t::kind_accessor) && + (AccTarget == target::local)) + throw sycl::exception( + make_error_code(errc::kernel_argument), + "A local accessor must not be used in a SYCL kernel function " + "that is invoked via single_task or via the simple form of " + "parallel_for that takes a range parameter."); + } + } + + template + void checkIfPlaceholderIsBoundToHandler( + accessor + Acc) { + auto *AccBase = reinterpret_cast(&Acc); + detail::AccessorImplHost *Req = detail::getSyclObjImpl(*AccBase).get(); + if (HasAssociatedAccessor(Req, AccessTarget)) + throw sycl::exception(make_error_code(errc::kernel_argument), + "placeholder accessor must be bound by calling " + "handler::require() before it can be used."); + } + template < ext::oneapi::experimental::detail::UnsupportedGraphFeatures FeatureT> void throwIfGraphAssociated() const { @@ -3538,6 +3576,7 @@ class __SYCL_EXPORT handler { "for use with the SYCL Graph extension."); } } +#endif // Set that an ND Range was used during a call to parallel_for void setNDRangeUsed(bool Value);