diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index ebee7791b9841..47bd3dad7a70a 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -20,6 +20,10 @@ namespace sycl { inline namespace _V1 { class handler; +namespace ext::oneapi::experimental::detail { +class dynamic_parameter_impl; +} + namespace detail { template struct is_unbounded_array : std::false_type {}; @@ -28,6 +32,18 @@ template struct is_unbounded_array : std::true_type {}; template inline constexpr bool is_unbounded_array_v = is_unbounded_array::value; +class NDRDescT; +class ArgDesc; + +void processArg( + void *Ptr, const kernel_param_kind_t &Kind, const int Size, + const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, + bool IsESIMD, NDRDescT NDRDesc, + std::vector> + DynamicParameters, + std::vector &Args); + class work_group_memory_impl { public: work_group_memory_impl() : buffer_size{0} {} @@ -39,6 +55,14 @@ class work_group_memory_impl { private: size_t buffer_size; friend class sycl::handler; + friend void processArg( + void *Ptr, const kernel_param_kind_t &Kind, const int Size, + const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, + bool IsESIMD, NDRDescT NDRDesc, + std::vector> + DynamicParameters, + std::vector &Args); }; } // namespace detail diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 0185c611bec59..a7c84ceb89e48 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -501,24 +501,19 @@ class __SYCL_EXPORT handler { extractArgsAndReqsFromLambda(char *LambdaPtr, size_t KernelArgsNum, const detail::kernel_param_desc_t *KernelArgs, bool IsESIMD); + void processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, + const int Size, const size_t Index, size_t &IndexShift, + bool IsKernelCreatedFromSource, bool IsESIMD); #endif + /// Extracts and prepares kernel arguments from the lambda using information /// from the built-ins or integration header. void extractArgsAndReqsFromLambda( char *LambdaPtr, detail::kernel_param_desc_t (*ParamDescGetter)(int), size_t NumKernelParams, bool IsESIMD); - /// Extracts and prepares kernel arguments set via set_arg(s). void extractArgsAndReqs(); -#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) - // TODO: processArg need not to be public - __SYCL_DLL_LOCAL -#endif - void processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, - const int Size, const size_t Index, size_t &IndexShift, - bool IsKernelCreatedFromSource, bool IsESIMD); - /// \return a string containing name of SYCL kernel. detail::ABINeutralKernelNameStrT getKernelName(); diff --git a/sycl/include/sycl/stream.hpp b/sycl/include/sycl/stream.hpp index a42b6b4a4380e..935f4a7b574f6 100644 --- a/sycl/include/sycl/stream.hpp +++ b/sycl/include/sycl/stream.hpp @@ -41,6 +41,15 @@ inline namespace _V1 { namespace detail { +void processArg( + void *Ptr, const kernel_param_kind_t &Kind, const int Size, + const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, + bool IsESIMD, NDRDescT NDRDesc, + std::vector> + DynamicParameters, + std::vector &Args); + class stream_impl; using FmtFlags = unsigned int; @@ -1042,6 +1051,14 @@ class __SYCL_EXPORT __SYCL_SPECIAL_CLASS __SYCL_TYPE(stream) stream #endif friend class handler; + friend void detail::processArg( + void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, + const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, + bool IsESIMD, detail::NDRDescT NDRDesc, + std::vector> + DynamicParameters, + std::vector &Args); template friend class ext::oneapi::weak_object; diff --git a/sycl/source/detail/arg_extraction.hpp b/sycl/source/detail/arg_extraction.hpp new file mode 100644 index 0000000000000..dae8e53756a07 --- /dev/null +++ b/sycl/source/detail/arg_extraction.hpp @@ -0,0 +1,386 @@ +//==--- arg_extraction.hpp --- SYCL kernel argument extraction utilities ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace sycl { +inline namespace _V1 { + +namespace detail { + +// The argument can take up more space to store additional information about +// MAccessRange, MMemoryRange, and MOffset added with addArgsForGlobalAccessor. +// We use the worst-case estimate because the lifetime of the vector is short. +// In processArg the kind_stream case introduces the maximum number of +// additional arguments. The case adds additional 12 arguments to the currently +// processed argument, hence worst-case estimate is 12+1=13. +// TODO: the constant can be removed if the size of MArgs will be calculated at +// compile time. +inline constexpr size_t MaxNumAdditionalArgs = 13; +constexpr static int AccessTargetMask = 0x7ff; + +inline void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, + size_t &IndexShift, int Size, + bool IsKernelCreatedFromSource, + size_t GlobalSize, + std::vector &Args, + bool isESIMD) { + using detail::kernel_param_kind_t; + if (AccImpl->PerWI) + AccImpl->resize(GlobalSize); + + Args.emplace_back(kernel_param_kind_t::kind_accessor, AccImpl, Size, + Index + IndexShift); + + // TODO ESIMD currently does not suport offset, memory and access ranges - + // accessor::init for ESIMD-mode accessor has a single field, translated + // to a single kernel argument set above. + if (!isESIMD && !IsKernelCreatedFromSource) { + // Dimensionality of the buffer is 1 when dimensionality of the + // accessor is 0. + const size_t SizeAccField = + sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, + &AccImpl->MAccessRange[0], SizeAccField, + Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, + &AccImpl->MMemoryRange[0], SizeAccField, + Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, + &AccImpl->MOffset[0], SizeAccField, Index + IndexShift); + } +} + +inline void addArgsForLocalAccessor(detail::LocalAccessorImplHost *LAcc, + size_t Index, size_t &IndexShift, + bool IsKernelCreatedFromSource, + std::vector &Args, + bool IsESIMD) { + using detail::kernel_param_kind_t; + + range<3> &LAccSize = LAcc->MSize; + const int Dims = LAcc->MDims; + int SizeInBytes = LAcc->MElemSize; + for (int I = 0; I < Dims; ++I) + SizeInBytes *= LAccSize[I]; + + // Some backends do not accept zero-sized local memory arguments, so we + // make it a minimum allocation of 1 byte. + SizeInBytes = std::max(SizeInBytes, 1); + Args.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr, SizeInBytes, + Index + IndexShift); + // TODO ESIMD currently does not suport MSize field passing yet + // accessor::init for ESIMD-mode accessor has a single field, translated + // to a single kernel argument set above. + if (!IsESIMD && !IsKernelCreatedFromSource) { + ++IndexShift; + const size_t SizeAccField = (Dims == 0 ? 1 : Dims) * sizeof(LAccSize[0]); + Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, + SizeAccField, Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, + SizeAccField, Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, + SizeAccField, Index + IndexShift); + } +} + +inline void processArg( + void *Ptr, const kernel_param_kind_t &Kind, const int Size, + const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, + bool IsESIMD, NDRDescT NDRDesc, + std::vector> + DynamicParameters, + std::vector &Args) { + size_t GlobalSize = NDRDesc.GlobalSize[0]; + for (size_t I = 1; I < NDRDesc.Dims; ++I) { + GlobalSize *= NDRDesc.GlobalSize[I]; + } + + switch (Kind) { + case kernel_param_kind_t::kind_std_layout: + case kernel_param_kind_t::kind_pointer: { + Args.emplace_back(Kind, Ptr, Size, Index + IndexShift); + break; + } + case kernel_param_kind_t::kind_stream: { + // Stream contains several accessors inside. + stream *S = static_cast(Ptr); + + detail::AccessorBaseHost *GBufBase = + static_cast(&S->GlobalBuf); + detail::Requirement *GBufReq = &*detail::getSyclObjImpl(*GBufBase); + addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, Args, + IsESIMD); + ++IndexShift; + detail::AccessorBaseHost *GOffsetBase = + static_cast(&S->GlobalOffset); + detail::Requirement *GOffsetReq = &*detail::getSyclObjImpl(*GOffsetBase); + addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, Args, + IsESIMD); + ++IndexShift; + detail::AccessorBaseHost *GFlushBase = + static_cast(&S->GlobalFlushBuf); + detail::Requirement *GFlushReq = &*detail::getSyclObjImpl(*GFlushBase); + + // If work group size wasn't set explicitly then it must be recieved + // from kernel attribute or set to default values. + // For now we can't get this attribute here. + // So we just suppose that WG size is always default for stream. + // TODO adjust MNDRDesc when device image contains kernel's attribute + if (GlobalSize == 0) { + GlobalSize = NDRDesc.NumWorkGroups[0]; + for (size_t I = 1; I < NDRDesc.Dims; ++I) { + GlobalSize *= NDRDesc.NumWorkGroups[I]; + } + } + addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, Args, + IsESIMD); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, &S->FlushBufferSize, + sizeof(S->FlushBufferSize), Index + IndexShift); + + break; + } + case kernel_param_kind_t::kind_accessor: { + // For args kind of accessor Size is information about accessor. + // The first 11 bits of Size encodes the accessor target. + const access::target AccTarget = + static_cast(Size & AccessTargetMask); + switch (AccTarget) { + case access::target::device: + case access::target::constant_buffer: { + detail::Requirement *AccImpl = static_cast(Ptr); + addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, Args, + IsESIMD); + break; + } + case access::target::local: { + detail::LocalAccessorImplHost *LAccImpl = + static_cast(Ptr); + + addArgsForLocalAccessor(LAccImpl, Index, IndexShift, + IsKernelCreatedFromSource, Args, IsESIMD); + break; + } + case access::target::image: + case access::target::image_array: { + detail::Requirement *AccImpl = static_cast(Ptr); + Args.emplace_back(Kind, AccImpl, Size, Index + IndexShift); + if (!IsKernelCreatedFromSource) { + // TODO Handle additional kernel arguments for image class + // if the compiler front-end adds them. + } + break; + } + case access::target::host_image: + case access::target::host_task: + case access::target::host_buffer: { + throw sycl::exception(make_error_code(errc::invalid), + "Unsupported accessor target case."); + break; + } + } + break; + } + case kernel_param_kind_t::kind_dynamic_accessor: { + const access::target AccTarget = + static_cast(Size & AccessTargetMask); + switch (AccTarget) { + case access::target::local: { + + // We need to recover the inheritance layout by casting to + // dynamic_parameter_impl first. Casting directly to + // dynamic_local_accessor_impl would result in an incorrect pointer. + auto *DynParamImpl = static_cast< + ext::oneapi::experimental::detail::dynamic_parameter_impl *>(Ptr); + + DynamicParameters.emplace_back(DynParamImpl, Index + IndexShift); + + auto *DynLocalAccessorImpl = static_cast< + ext::oneapi::experimental::detail::dynamic_local_accessor_impl *>( + DynParamImpl); + + addArgsForLocalAccessor(&DynLocalAccessorImpl->LAccImplHost, Index, + IndexShift, IsKernelCreatedFromSource, Args, + IsESIMD); + break; + } + default: { + assert(false && "Unsupported dynamic accessor target"); + } + } + break; + } + case kernel_param_kind_t::kind_dynamic_work_group_memory: { + + // We need to recover the inheritance layout by casting to + // dynamic_parameter_impl first. Casting directly to + // dynamic_work_group_memory_impl would result in an incorrect pointer. + auto *DynParamImpl = static_cast< + ext::oneapi::experimental::detail::dynamic_parameter_impl *>(Ptr); + + DynamicParameters.emplace_back(DynParamImpl, Index + IndexShift); + + auto *DynWorkGroupImpl = static_cast< + ext::oneapi::experimental::detail::dynamic_work_group_memory_impl *>( + DynParamImpl); + + Args.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr, + DynWorkGroupImpl->BufferSizeInBytes, Index + IndexShift); + break; + } + case kernel_param_kind_t::kind_work_group_memory: { + Args.emplace_back( + kernel_param_kind_t::kind_std_layout, nullptr, + static_cast(Ptr)->buffer_size, + Index + IndexShift); + break; + } + case kernel_param_kind_t::kind_sampler: { + Args.emplace_back(kernel_param_kind_t::kind_sampler, Ptr, sizeof(sampler), + Index + IndexShift); + break; + } + case kernel_param_kind_t::kind_specialization_constants_buffer: { + Args.emplace_back(kernel_param_kind_t::kind_specialization_constants_buffer, + Ptr, Size, Index + IndexShift); + break; + } + case kernel_param_kind_t::kind_invalid: + throw exception(make_error_code(errc::invalid), + "Invalid kernel param kind"); + break; + } +} + +inline void validateDynamicParameterGraphState(bool QueueHasCommandGraph, + bool IsGraphSubmission) { + if (QueueHasCommandGraph) { + throw sycl::exception( + make_error_code(errc::invalid), + "Dynamic Parameters cannot be used with Graph Queue recording."); + } + if (!IsGraphSubmission) { + throw sycl::exception( + make_error_code(errc::invalid), + "Dynamic Parameters cannot be used with normal SYCL submissions"); + } +} + +inline void extractArgsAndReqsFromLambda( + char *LambdaPtr, detail::kernel_param_desc_t (*ParamDescGetter)(int), + size_t NumKernelParams, bool IsESIMD, bool QueueHasCommandGraph, + bool IsGraphSubmission, detail::NDRDescT NDRDesc, + std::vector> + DynamicParameters, + std::vector &Args) { + size_t IndexShift = 0; + Args.reserve(MaxNumAdditionalArgs * NumKernelParams); + + for (size_t I = 0; I < NumKernelParams; ++I) { + detail::kernel_param_desc_t ParamDesc = ParamDescGetter(I); + void *Ptr = LambdaPtr + ParamDesc.offset; + const detail::kernel_param_kind_t &Kind = ParamDesc.kind; + const int &Size = ParamDesc.info; + if (Kind == detail::kernel_param_kind_t::kind_accessor) { + // For args kind of accessor Size is information about accessor. + // The first 11 bits of Size encodes the accessor target. + const access::target AccTarget = + static_cast(Size & AccessTargetMask); + if ((AccTarget == access::target::device || + AccTarget == access::target::constant_buffer) || + (AccTarget == access::target::image || + AccTarget == access::target::image_array)) { + detail::AccessorBaseHost *AccBase = + static_cast(Ptr); + Ptr = detail::getSyclObjImpl(*AccBase).get(); + } else if (AccTarget == access::target::local) { + detail::LocalAccessorBaseHost *LocalAccBase = + static_cast(Ptr); + Ptr = detail::getSyclObjImpl(*LocalAccBase).get(); + } + } else if (Kind == detail::kernel_param_kind_t::kind_dynamic_accessor) { + // For args kind of accessor Size is information about accessor. + // The first 11 bits of Size encodes the accessor target. + // Only local targets are supported for dynamic accessors. + assert(static_cast(Size & AccessTargetMask) == + access::target::local); + + validateDynamicParameterGraphState(QueueHasCommandGraph, + IsGraphSubmission); + + ext::oneapi::experimental::detail::dynamic_parameter_base + *DynamicParamBase = static_cast< + ext::oneapi::experimental::detail::dynamic_parameter_base *>(Ptr); + Ptr = detail::getSyclObjImpl(*DynamicParamBase).get(); + } else if (Kind == + detail::kernel_param_kind_t::kind_dynamic_work_group_memory) { + + validateDynamicParameterGraphState(QueueHasCommandGraph, + IsGraphSubmission); + + ext::oneapi::experimental::detail::dynamic_parameter_base + *DynamicParamBase = static_cast< + ext::oneapi::experimental::detail::dynamic_parameter_base *>(Ptr); + Ptr = detail::getSyclObjImpl(*DynamicParamBase).get(); + } + + processArg(Ptr, Kind, Size, I, IndexShift, + /*IsKernelCreatedFromSource=*/false, IsESIMD, NDRDesc, + DynamicParameters, Args); + } +} + +inline void extractArgsAndReqs( + bool IsKernelCreatedFromSource, detail::NDRDescT NDRDesc, + std::vector> + DynamicParameters, + std::vector &UnPreparedArgs, + std::vector &Args) { + + Args.reserve(MaxNumAdditionalArgs * UnPreparedArgs.size()); + + size_t IndexShift = 0; + for (size_t I = 0; I < UnPreparedArgs.size(); ++I) { + void *Ptr = UnPreparedArgs[I].MPtr; + const detail::kernel_param_kind_t &Kind = UnPreparedArgs[I].MType; + const int &Size = UnPreparedArgs[I].MSize; + const int Index = UnPreparedArgs[I].MIndex; + + processArg(Ptr, Kind, Size, Index, IndexShift, + /*IsKernelCreatedFromSource=*/false, /*IsESIMD=*/false, NDRDesc, + DynamicParameters, Args); + } +} + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index f9d1769e573e7..11d6ffb82ad6d 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -10,6 +10,7 @@ #include "ur_api.h" #include +#include #include #include #include @@ -1039,76 +1040,7 @@ void handler::associateWithHandler( static_cast(AccTarget)); } -static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, - size_t &IndexShift, int Size, - bool IsKernelCreatedFromSource, - size_t GlobalSize, - std::vector &Args, - bool isESIMD) { - using detail::kernel_param_kind_t; - if (AccImpl->PerWI) - AccImpl->resize(GlobalSize); - - Args.emplace_back(kernel_param_kind_t::kind_accessor, AccImpl, Size, - Index + IndexShift); - - // TODO ESIMD currently does not suport offset, memory and access ranges - - // accessor::init for ESIMD-mode accessor has a single field, translated - // to a single kernel argument set above. - if (!isESIMD && !IsKernelCreatedFromSource) { - // Dimensionality of the buffer is 1 when dimensionality of the - // accessor is 0. - const size_t SizeAccField = - sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims); - ++IndexShift; - Args.emplace_back(kernel_param_kind_t::kind_std_layout, - &AccImpl->MAccessRange[0], SizeAccField, - Index + IndexShift); - ++IndexShift; - Args.emplace_back(kernel_param_kind_t::kind_std_layout, - &AccImpl->MMemoryRange[0], SizeAccField, - Index + IndexShift); - ++IndexShift; - Args.emplace_back(kernel_param_kind_t::kind_std_layout, - &AccImpl->MOffset[0], SizeAccField, Index + IndexShift); - } -} - -static void addArgsForLocalAccessor(detail::LocalAccessorImplHost *LAcc, - size_t Index, size_t &IndexShift, - bool IsKernelCreatedFromSource, - std::vector &Args, - bool IsESIMD) { - using detail::kernel_param_kind_t; - - range<3> &LAccSize = LAcc->MSize; - const int Dims = LAcc->MDims; - int SizeInBytes = LAcc->MElemSize; - for (int I = 0; I < Dims; ++I) - SizeInBytes *= LAccSize[I]; - - // Some backends do not accept zero-sized local memory arguments, so we - // make it a minimum allocation of 1 byte. - SizeInBytes = std::max(SizeInBytes, 1); - Args.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr, SizeInBytes, - Index + IndexShift); - // TODO ESIMD currently does not suport MSize field passing yet - // accessor::init for ESIMD-mode accessor has a single field, translated - // to a single kernel argument set above. - if (!IsESIMD && !IsKernelCreatedFromSource) { - ++IndexShift; - const size_t SizeAccField = (Dims == 0 ? 1 : Dims) * sizeof(LAccSize[0]); - Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, - SizeAccField, Index + IndexShift); - ++IndexShift; - Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, - SizeAccField, Index + IndexShift); - ++IndexShift; - Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, - SizeAccField, Index + IndexShift); - } -} - +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, bool IsESIMD) { @@ -1131,16 +1063,16 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, detail::AccessorBaseHost *GBufBase = static_cast(&S->GlobalBuf); detail::Requirement *GBufReq = &*detail::getSyclObjImpl(*GBufBase); - addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size, - IsKernelCreatedFromSource, GlobalSize, impl->MArgs, - IsESIMD); + detail::addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, + impl->MArgs, IsESIMD); ++IndexShift; detail::AccessorBaseHost *GOffsetBase = static_cast(&S->GlobalOffset); detail::Requirement *GOffsetReq = &*detail::getSyclObjImpl(*GOffsetBase); - addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size, - IsKernelCreatedFromSource, GlobalSize, impl->MArgs, - IsESIMD); + detail::addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, + impl->MArgs, IsESIMD); ++IndexShift; detail::AccessorBaseHost *GFlushBase = static_cast(&S->GlobalFlushBuf); @@ -1157,9 +1089,9 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, GlobalSize *= impl->MNDRDesc.NumWorkGroups[I]; } } - addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size, - IsKernelCreatedFromSource, GlobalSize, impl->MArgs, - IsESIMD); + detail::addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, + impl->MArgs, IsESIMD); ++IndexShift; addArg(kernel_param_kind_t::kind_std_layout, &S->FlushBufferSize, sizeof(S->FlushBufferSize), Index + IndexShift); @@ -1175,17 +1107,18 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, case access::target::device: case access::target::constant_buffer: { detail::Requirement *AccImpl = static_cast(Ptr); - addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size, - IsKernelCreatedFromSource, GlobalSize, - impl->MArgs, IsESIMD); + detail::addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, + impl->MArgs, IsESIMD); break; } case access::target::local: { detail::LocalAccessorImplHost *LAccImpl = static_cast(Ptr); - addArgsForLocalAccessor(LAccImpl, Index, IndexShift, - IsKernelCreatedFromSource, impl->MArgs, IsESIMD); + detail::addArgsForLocalAccessor(LAccImpl, Index, IndexShift, + IsKernelCreatedFromSource, impl->MArgs, + IsESIMD); break; } case access::target::image: @@ -1226,9 +1159,9 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, ext::oneapi::experimental::detail::dynamic_local_accessor_impl *>( DynParamImpl); - addArgsForLocalAccessor(&DynLocalAccessorImpl->LAccImplHost, Index, - IndexShift, IsKernelCreatedFromSource, - impl->MArgs, IsESIMD); + detail::addArgsForLocalAccessor( + &DynLocalAccessorImpl->LAccImplHost, Index, IndexShift, + IsKernelCreatedFromSource, impl->MArgs, IsESIMD); break; } default: { @@ -1277,6 +1210,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, break; } } +#endif //__INTEL_PREVIEW_BREAKING_CHANGES void handler::setArgHelper(int ArgIndex, detail::work_group_memory_impl &Arg) { impl->MWorkGroupMemoryObjects.push_back( @@ -1291,16 +1225,6 @@ void handler::setArgHelper(int ArgIndex, stream &&Str) { ArgIndex); } -// The argument can take up more space to store additional information about -// MAccessRange, MMemoryRange, and MOffset added with addArgsForGlobalAccessor. -// We use the worst-case estimate because the lifetime of the vector is short. -// In processArg the kind_stream case introduces the maximum number of -// additional arguments. The case adds additional 12 arguments to the currently -// processed argument, hence worst-case estimate is 12+1=13. -// TODO: the constant can be removed if the size of MArgs will be calculated at -// compile time. -inline constexpr size_t MaxNumAdditionalArgs = 13; - void handler::extractArgsAndReqs() { assert(MKernel && "MKernel is not initialized"); std::vector UnPreparedArgs = std::move(impl->MArgs); @@ -1313,72 +1237,36 @@ void handler::extractArgsAndReqs() { }); const bool IsKernelCreatedFromSource = MKernel->isCreatedFromSource(); - impl->MArgs.reserve(MaxNumAdditionalArgs * UnPreparedArgs.size()); - size_t IndexShift = 0; - for (size_t I = 0; I < UnPreparedArgs.size(); ++I) { - void *Ptr = UnPreparedArgs[I].MPtr; - const detail::kernel_param_kind_t &Kind = UnPreparedArgs[I].MType; - const int &Size = UnPreparedArgs[I].MSize; - const int Index = UnPreparedArgs[I].MIndex; - processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource, - false); - } + detail::extractArgsAndReqs(IsKernelCreatedFromSource, impl->MNDRDesc, + impl->MDynamicParameters, UnPreparedArgs, + impl->MArgs); } void handler::extractArgsAndReqsFromLambda( char *LambdaPtr, detail::kernel_param_desc_t (*ParamDescGetter)(int), size_t NumKernelParams, bool IsESIMD) { - size_t IndexShift = 0; - impl->MArgs.reserve(MaxNumAdditionalArgs * NumKernelParams); - - for (size_t I = 0; I < NumKernelParams; ++I) { - detail::kernel_param_desc_t ParamDesc = ParamDescGetter(I); - void *Ptr = LambdaPtr + ParamDesc.offset; - const detail::kernel_param_kind_t &Kind = ParamDesc.kind; - const int &Size = ParamDesc.info; - if (Kind == detail::kernel_param_kind_t::kind_accessor) { - // For args kind of accessor Size is information about accessor. - // The first 11 bits of Size encodes the accessor target. - const access::target AccTarget = - static_cast(Size & AccessTargetMask); - if ((AccTarget == access::target::device || - AccTarget == access::target::constant_buffer) || - (AccTarget == access::target::image || - AccTarget == access::target::image_array)) { - detail::AccessorBaseHost *AccBase = - static_cast(Ptr); - Ptr = detail::getSyclObjImpl(*AccBase).get(); - } else if (AccTarget == access::target::local) { - detail::LocalAccessorBaseHost *LocalAccBase = - static_cast(Ptr); - Ptr = detail::getSyclObjImpl(*LocalAccBase).get(); - } - } else if (Kind == detail::kernel_param_kind_t::kind_dynamic_accessor) { - // For args kind of accessor Size is information about accessor. - // The first 11 bits of Size encodes the accessor target. - // Only local targets are supported for dynamic accessors. - assert(static_cast(Size & AccessTargetMask) == - access::target::local); - - ext::oneapi::experimental::detail::dynamic_parameter_base - *DynamicParamBase = static_cast< - ext::oneapi::experimental::detail::dynamic_parameter_base *>(Ptr); - Ptr = detail::getSyclObjImpl(*DynamicParamBase).get(); - } else if (Kind == - detail::kernel_param_kind_t::kind_dynamic_work_group_memory) { - ext::oneapi::experimental::detail::dynamic_parameter_base - *DynamicParamBase = static_cast< - ext::oneapi::experimental::detail::dynamic_parameter_base *>(Ptr); - Ptr = detail::getSyclObjImpl(*DynamicParamBase).get(); - } + detail::queue_impl *Queue = impl->get_queue_or_null(); + bool QueueHasCommandGraph = Queue && Queue->hasCommandGraph(); + bool IsGraphSubmission = impl->get_graph_or_null() != nullptr; - processArg(Ptr, Kind, Size, I, IndexShift, - /*IsKernelCreatedFromSource=*/false, IsESIMD); - } + detail::extractArgsAndReqsFromLambda( + LambdaPtr, ParamDescGetter, NumKernelParams, IsESIMD, + QueueHasCommandGraph, IsGraphSubmission, impl->MNDRDesc, + impl->MDynamicParameters, impl->MArgs); } #ifndef __INTEL_PREVIEW_BREAKING_CHANGES +// The argument can take up more space to store additional information about +// MAccessRange, MMemoryRange, and MOffset added with addArgsForGlobalAccessor. +// We use the worst-case estimate because the lifetime of the vector is short. +// In processArg the kind_stream case introduces the maximum number of +// additional arguments. The case adds additional 12 arguments to the currently +// processed argument, hence worst-case estimate is 12+1=13. +// TODO: the constant can be removed if the size of MArgs will be calculated at +// compile time. +inline constexpr size_t MaxNumAdditionalArgs = 13; + // TODO: Those functions are not used anymore, remove it in the next // ABI-breaking window. void handler::extractArgsAndReqsFromLambda(