From fe33437d38770d6e550b2bcb43b8f165b8388b51 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Tue, 19 Aug 2025 07:29:26 +0000 Subject: [PATCH 1/7] [SYCL][NFC] Add a new header with kernel argument extraction logic The kernel argument extraction logic has been refactored and moved to a separate header, to be used in both handler-based and handler-less kernel submission flows. --- .../oneapi/experimental/work_group_memory.hpp | 17 + sycl/include/sycl/handler.hpp | 2 +- sycl/include/sycl/stream.hpp | 13 + sycl/source/detail/arg_extraction.hpp | 356 ++++++++++++++++++ sycl/source/handler.cpp | 96 +---- 5 files changed, 402 insertions(+), 82 deletions(-) create mode 100644 sycl/source/detail/arg_extraction.hpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index ebee7791b9841..25a13e8c8a5f6 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -28,6 +28,17 @@ template struct is_unbounded_array : std::true_type {}; template inline constexpr bool is_unbounded_array_v = is_unbounded_array::value; +class NDRDescT; +class ArgDesc; +class dynamic_parameter_impl; + +void processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, + const int Size, const size_t Index, size_t &IndexShift, + bool IsKernelCreatedFromSource, bool IsESIMD, + detail::NDRDescT NDRDesc, std::vector> + DynamicParameters, std::vector &Args); + class work_group_memory_impl { public: work_group_memory_impl() : buffer_size{0} {} @@ -39,6 +50,12 @@ class work_group_memory_impl { private: size_t buffer_size; friend class sycl::handler; + friend void detail::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, + const int Size, const size_t Index, size_t &IndexShift, + bool IsKernelCreatedFromSource, bool IsESIMD, + detail::NDRDescT NDRDesc, std::vector> + DynamicParameters, std::vector &Args); }; } // namespace detail diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index bd91b9dc755ec..8e92ad27a95bc 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -499,12 +499,12 @@ class __SYCL_EXPORT handler { extractArgsAndReqsFromLambda(char *LambdaPtr, size_t KernelArgsNum, const detail::kernel_param_desc_t *KernelArgs, bool IsESIMD); -#endif /// Extracts and prepares kernel arguments from the lambda using information /// from the built-ins or integration header. void extractArgsAndReqsFromLambda( char *LambdaPtr, detail::kernel_param_desc_t (*ParamDescGetter)(int), size_t NumKernelParams, bool IsESIMD); +#endif /// Extracts and prepares kernel arguments set via set_arg(s). void extractArgsAndReqs(); diff --git a/sycl/include/sycl/stream.hpp b/sycl/include/sycl/stream.hpp index a42b6b4a4380e..1309e2a9d14c3 100644 --- a/sycl/include/sycl/stream.hpp +++ b/sycl/include/sycl/stream.hpp @@ -41,6 +41,13 @@ inline namespace _V1 { namespace detail { +void processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, + const int Size, const size_t Index, size_t &IndexShift, + bool IsKernelCreatedFromSource, bool IsESIMD, + detail::NDRDescT NDRDesc, std::vector> + DynamicParameters, std::vector &Args); + class stream_impl; using FmtFlags = unsigned int; @@ -1042,6 +1049,12 @@ class __SYCL_EXPORT __SYCL_SPECIAL_CLASS __SYCL_TYPE(stream) stream #endif friend class handler; + friend void detail::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, + const int Size, const size_t Index, size_t &IndexShift, + bool IsKernelCreatedFromSource, bool IsESIMD, + detail::NDRDescT NDRDesc, std::vector> + DynamicParameters, std::vector &Args); template friend class ext::oneapi::weak_object; diff --git a/sycl/source/detail/arg_extraction.hpp b/sycl/source/detail/arg_extraction.hpp new file mode 100644 index 0000000000000..2b52309393026 --- /dev/null +++ b/sycl/source/detail/arg_extraction.hpp @@ -0,0 +1,356 @@ +//==--- arg_extraction.hpp --- SYCL kernel argument extraction utilities ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace sycl { +inline namespace _V1 { + +namespace ext::oneapi::experimental::detail { +class dynamic_parameter_base; +class dynamic_work_group_memory_base; +class dynamic_local_accessor_base; +class graph_impl; +class dynamic_parameter_impl; +} // namespace ext::oneapi::experimental::detail +namespace detail { + +inline constexpr size_t MaxNumAdditionalArgs = 13; +constexpr static int AccessTargetMask = 0x7ff; + +void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, + size_t &IndexShift, int Size, + bool IsKernelCreatedFromSource, size_t GlobalSize, + std::vector &Args, + bool isESIMD) { + using detail::kernel_param_kind_t; + if (AccImpl->PerWI) + AccImpl->resize(GlobalSize); + + Args.emplace_back(kernel_param_kind_t::kind_accessor, AccImpl, Size, + Index + IndexShift); + + // TODO ESIMD currently does not suport offset, memory and access ranges - + // accessor::init for ESIMD-mode accessor has a single field, translated + // to a single kernel argument set above. + if (!isESIMD && !IsKernelCreatedFromSource) { + // Dimensionality of the buffer is 1 when dimensionality of the + // accessor is 0. + const size_t SizeAccField = + sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, + &AccImpl->MAccessRange[0], SizeAccField, + Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, + &AccImpl->MMemoryRange[0], SizeAccField, + Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, + &AccImpl->MOffset[0], SizeAccField, Index + IndexShift); + } +} + +void addArgsForLocalAccessor(detail::LocalAccessorImplHost *LAcc, size_t Index, + size_t &IndexShift, bool IsKernelCreatedFromSource, + std::vector &Args, bool IsESIMD) { + using detail::kernel_param_kind_t; + + range<3> &LAccSize = LAcc->MSize; + const int Dims = LAcc->MDims; + int SizeInBytes = LAcc->MElemSize; + for (int I = 0; I < Dims; ++I) + SizeInBytes *= LAccSize[I]; + + // Some backends do not accept zero-sized local memory arguments, so we + // make it a minimum allocation of 1 byte. + SizeInBytes = std::max(SizeInBytes, 1); + Args.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr, SizeInBytes, + Index + IndexShift); + // TODO ESIMD currently does not suport MSize field passing yet + // accessor::init for ESIMD-mode accessor has a single field, translated + // to a single kernel argument set above. + if (!IsESIMD && !IsKernelCreatedFromSource) { + ++IndexShift; + const size_t SizeAccField = (Dims == 0 ? 1 : Dims) * sizeof(LAccSize[0]); + Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, + SizeAccField, Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, + SizeAccField, Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, + SizeAccField, Index + IndexShift); + } +} + +void processArg( + void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, + const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, + bool IsESIMD, detail::NDRDescT NDRDesc, + std::vector> + DynamicParameters, + std::vector &Args) { + using detail::kernel_param_kind_t; + size_t GlobalSize = NDRDesc.GlobalSize[0]; + for (size_t I = 1; I < NDRDesc.Dims; ++I) { + GlobalSize *= NDRDesc.GlobalSize[I]; + } + + switch (Kind) { + case kernel_param_kind_t::kind_std_layout: + case kernel_param_kind_t::kind_pointer: { + Args.emplace_back(Kind, Ptr, Size, Index + IndexShift); + break; + } + case kernel_param_kind_t::kind_stream: { + // Stream contains several accessors inside. + stream *S = static_cast(Ptr); + + detail::AccessorBaseHost *GBufBase = + static_cast(&S->GlobalBuf); + detail::Requirement *GBufReq = &*detail::getSyclObjImpl(*GBufBase); + addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, Args, + IsESIMD); + ++IndexShift; + detail::AccessorBaseHost *GOffsetBase = + static_cast(&S->GlobalOffset); + detail::Requirement *GOffsetReq = &*detail::getSyclObjImpl(*GOffsetBase); + addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, Args, + IsESIMD); + ++IndexShift; + detail::AccessorBaseHost *GFlushBase = + static_cast(&S->GlobalFlushBuf); + detail::Requirement *GFlushReq = &*detail::getSyclObjImpl(*GFlushBase); + + // If work group size wasn't set explicitly then it must be recieved + // from kernel attribute or set to default values. + // For now we can't get this attribute here. + // So we just suppose that WG size is always default for stream. + // TODO adjust MNDRDesc when device image contains kernel's attribute + if (GlobalSize == 0) { + GlobalSize = NDRDesc.NumWorkGroups[0]; + for (size_t I = 1; I < NDRDesc.Dims; ++I) { + GlobalSize *= NDRDesc.NumWorkGroups[I]; + } + } + addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, Args, + IsESIMD); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, &S->FlushBufferSize, + sizeof(S->FlushBufferSize), Index + IndexShift); + + break; + } + case kernel_param_kind_t::kind_accessor: { + // For args kind of accessor Size is information about accessor. + // The first 11 bits of Size encodes the accessor target. + const access::target AccTarget = + static_cast(Size & AccessTargetMask); + switch (AccTarget) { + case access::target::device: + case access::target::constant_buffer: { + detail::Requirement *AccImpl = static_cast(Ptr); + addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size, + IsKernelCreatedFromSource, GlobalSize, Args, + IsESIMD); + break; + } + case access::target::local: { + detail::LocalAccessorImplHost *LAccImpl = + static_cast(Ptr); + + addArgsForLocalAccessor(LAccImpl, Index, IndexShift, + IsKernelCreatedFromSource, Args, IsESIMD); + break; + } + case access::target::image: + case access::target::image_array: { + detail::Requirement *AccImpl = static_cast(Ptr); + Args.emplace_back(Kind, AccImpl, Size, Index + IndexShift); + if (!IsKernelCreatedFromSource) { + // TODO Handle additional kernel arguments for image class + // if the compiler front-end adds them. + } + break; + } + case access::target::host_image: + case access::target::host_task: + case access::target::host_buffer: { + throw sycl::exception(make_error_code(errc::invalid), + "Unsupported accessor target case."); + break; + } + } + break; + } + case kernel_param_kind_t::kind_dynamic_accessor: { + const access::target AccTarget = + static_cast(Size & AccessTargetMask); + switch (AccTarget) { + case access::target::local: { + + // We need to recover the inheritance layout by casting to + // dynamic_parameter_impl first. Casting directly to + // dynamic_local_accessor_impl would result in an incorrect pointer. + auto *DynParamImpl = static_cast< + ext::oneapi::experimental::detail::dynamic_parameter_impl *>(Ptr); + + DynamicParameters.emplace_back(DynParamImpl, Index + IndexShift); + + auto *DynLocalAccessorImpl = static_cast< + ext::oneapi::experimental::detail::dynamic_local_accessor_impl *>( + DynParamImpl); + + addArgsForLocalAccessor(&DynLocalAccessorImpl->LAccImplHost, Index, + IndexShift, IsKernelCreatedFromSource, Args, + IsESIMD); + break; + } + default: { + assert(false && "Unsupported dynamic accessor target"); + } + } + break; + } + case kernel_param_kind_t::kind_dynamic_work_group_memory: { + + // We need to recover the inheritance layout by casting to + // dynamic_parameter_impl first. Casting directly to + // dynamic_work_group_memory_impl would result in an incorrect pointer. + auto *DynParamImpl = static_cast< + ext::oneapi::experimental::detail::dynamic_parameter_impl *>(Ptr); + + DynamicParameters.emplace_back(DynParamImpl, Index + IndexShift); + + auto *DynWorkGroupImpl = static_cast< + ext::oneapi::experimental::detail::dynamic_work_group_memory_impl *>( + DynParamImpl); + + Args.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr, + DynWorkGroupImpl->BufferSizeInBytes, Index + IndexShift); + break; + } + case kernel_param_kind_t::kind_work_group_memory: { + Args.emplace_back( + kernel_param_kind_t::kind_std_layout, nullptr, + static_cast(Ptr)->buffer_size, + Index + IndexShift); + break; + } + case kernel_param_kind_t::kind_sampler: { + Args.emplace_back(kernel_param_kind_t::kind_sampler, Ptr, sizeof(sampler), + Index + IndexShift); + break; + } + case kernel_param_kind_t::kind_specialization_constants_buffer: { + Args.emplace_back(kernel_param_kind_t::kind_specialization_constants_buffer, + Ptr, Size, Index + IndexShift); + break; + } + case kernel_param_kind_t::kind_invalid: + throw exception(make_error_code(errc::invalid), + "Invalid kernel param kind"); + break; + } +} + +void validateDynamicParameterGraphState(bool QueueHasCommandGraph, + bool IsGraphSubmission) { + if (QueueHasCommandGraph) { + throw sycl::exception( + make_error_code(errc::invalid), + "Dynamic Parameters cannot be used with Graph Queue recording."); + } + if (!IsGraphSubmission) { + throw sycl::exception( + make_error_code(errc::invalid), + "Dynamic Parameters cannot be used with normal SYCL submissions"); + } +} + +void extractArgsAndReqsFromLambda( + char *LambdaPtr, detail::kernel_param_desc_t (*ParamDescGetter)(int), + size_t NumKernelParams, bool IsESIMD, bool QueueHasCommandGraph, + bool IsGraphSubmission, detail::NDRDescT NDRDesc, + std::vector> + DynamicParameters, + std::vector &Args) { + size_t IndexShift = 0; + Args.reserve(MaxNumAdditionalArgs * NumKernelParams); + + for (size_t I = 0; I < NumKernelParams; ++I) { + detail::kernel_param_desc_t ParamDesc = ParamDescGetter(I); + void *Ptr = LambdaPtr + ParamDesc.offset; + const detail::kernel_param_kind_t &Kind = ParamDesc.kind; + const int &Size = ParamDesc.info; + if (Kind == detail::kernel_param_kind_t::kind_accessor) { + // For args kind of accessor Size is information about accessor. + // The first 11 bits of Size encodes the accessor target. + const access::target AccTarget = + static_cast(Size & AccessTargetMask); + if ((AccTarget == access::target::device || + AccTarget == access::target::constant_buffer) || + (AccTarget == access::target::image || + AccTarget == access::target::image_array)) { + detail::AccessorBaseHost *AccBase = + static_cast(Ptr); + Ptr = detail::getSyclObjImpl(*AccBase).get(); + } else if (AccTarget == access::target::local) { + detail::LocalAccessorBaseHost *LocalAccBase = + static_cast(Ptr); + Ptr = detail::getSyclObjImpl(*LocalAccBase).get(); + } + } else if (Kind == detail::kernel_param_kind_t::kind_dynamic_accessor) { + // For args kind of accessor Size is information about accessor. + // The first 11 bits of Size encodes the accessor target. + // Only local targets are supported for dynamic accessors. + assert(static_cast(Size & AccessTargetMask) == + access::target::local); + + validateDynamicParameterGraphState(QueueHasCommandGraph, + IsGraphSubmission); + + ext::oneapi::experimental::detail::dynamic_parameter_base + *DynamicParamBase = static_cast< + ext::oneapi::experimental::detail::dynamic_parameter_base *>(Ptr); + Ptr = detail::getSyclObjImpl(*DynamicParamBase).get(); + } else if (Kind == + detail::kernel_param_kind_t::kind_dynamic_work_group_memory) { + + validateDynamicParameterGraphState(QueueHasCommandGraph, + IsGraphSubmission); + + ext::oneapi::experimental::detail::dynamic_parameter_base + *DynamicParamBase = static_cast< + ext::oneapi::experimental::detail::dynamic_parameter_base *>(Ptr); + Ptr = detail::getSyclObjImpl(*DynamicParamBase).get(); + } + + processArg(Ptr, Kind, Size, I, IndexShift, + /*IsKernelCreatedFromSource=*/false, IsESIMD, NDRDesc, + DynamicParameters, Args); + } +} + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 65713eea639a2..0d69425f067d0 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -495,9 +496,12 @@ event handler::finalize() { if (type == detail::CGType::Kernel && impl->MKernelFuncPtr && (!KernelFastPath || impl->MKernelHasSpecialCaptures)) { clearArgs(); - extractArgsAndReqsFromLambda((char *)impl->MKernelFuncPtr, + detail::extractArgsAndReqsFromLambda((char *)impl->MKernelFuncPtr, impl->MKernelParamDescGetter, - impl->MKernelNumArgs, impl->MKernelIsESIMD); + impl->MKernelNumArgs, impl->MKernelIsESIMD, + Queue && Queue->hasCommandGraph(), + impl->get_graph_or_null() != nullptr, + impl->MNDRDesc, impl->MDynamicParameters, impl->MArgs); } // According to 4.7.6.9 of SYCL2020 spec, if a placeholder accessor is passed @@ -1038,76 +1042,6 @@ void handler::associateWithHandler( static_cast(AccTarget)); } -static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, - size_t &IndexShift, int Size, - bool IsKernelCreatedFromSource, - size_t GlobalSize, - std::vector &Args, - bool isESIMD) { - using detail::kernel_param_kind_t; - if (AccImpl->PerWI) - AccImpl->resize(GlobalSize); - - Args.emplace_back(kernel_param_kind_t::kind_accessor, AccImpl, Size, - Index + IndexShift); - - // TODO ESIMD currently does not suport offset, memory and access ranges - - // accessor::init for ESIMD-mode accessor has a single field, translated - // to a single kernel argument set above. - if (!isESIMD && !IsKernelCreatedFromSource) { - // Dimensionality of the buffer is 1 when dimensionality of the - // accessor is 0. - const size_t SizeAccField = - sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims); - ++IndexShift; - Args.emplace_back(kernel_param_kind_t::kind_std_layout, - &AccImpl->MAccessRange[0], SizeAccField, - Index + IndexShift); - ++IndexShift; - Args.emplace_back(kernel_param_kind_t::kind_std_layout, - &AccImpl->MMemoryRange[0], SizeAccField, - Index + IndexShift); - ++IndexShift; - Args.emplace_back(kernel_param_kind_t::kind_std_layout, - &AccImpl->MOffset[0], SizeAccField, Index + IndexShift); - } -} - -static void addArgsForLocalAccessor(detail::LocalAccessorImplHost *LAcc, - size_t Index, size_t &IndexShift, - bool IsKernelCreatedFromSource, - std::vector &Args, - bool IsESIMD) { - using detail::kernel_param_kind_t; - - range<3> &LAccSize = LAcc->MSize; - const int Dims = LAcc->MDims; - int SizeInBytes = LAcc->MElemSize; - for (int I = 0; I < Dims; ++I) - SizeInBytes *= LAccSize[I]; - - // Some backends do not accept zero-sized local memory arguments, so we - // make it a minimum allocation of 1 byte. - SizeInBytes = std::max(SizeInBytes, 1); - Args.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr, SizeInBytes, - Index + IndexShift); - // TODO ESIMD currently does not suport MSize field passing yet - // accessor::init for ESIMD-mode accessor has a single field, translated - // to a single kernel argument set above. - if (!IsESIMD && !IsKernelCreatedFromSource) { - ++IndexShift; - const size_t SizeAccField = (Dims == 0 ? 1 : Dims) * sizeof(LAccSize[0]); - Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, - SizeAccField, Index + IndexShift); - ++IndexShift; - Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, - SizeAccField, Index + IndexShift); - ++IndexShift; - Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, - SizeAccField, Index + IndexShift); - } -} - void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, bool IsESIMD) { @@ -1130,14 +1064,14 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, detail::AccessorBaseHost *GBufBase = static_cast(&S->GlobalBuf); detail::Requirement *GBufReq = &*detail::getSyclObjImpl(*GBufBase); - addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size, + detail::addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size, IsKernelCreatedFromSource, GlobalSize, impl->MArgs, IsESIMD); ++IndexShift; detail::AccessorBaseHost *GOffsetBase = static_cast(&S->GlobalOffset); detail::Requirement *GOffsetReq = &*detail::getSyclObjImpl(*GOffsetBase); - addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size, + detail::addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size, IsKernelCreatedFromSource, GlobalSize, impl->MArgs, IsESIMD); ++IndexShift; @@ -1156,7 +1090,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, GlobalSize *= impl->MNDRDesc.NumWorkGroups[I]; } } - addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size, + detail::addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size, IsKernelCreatedFromSource, GlobalSize, impl->MArgs, IsESIMD); ++IndexShift; @@ -1174,7 +1108,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, case access::target::device: case access::target::constant_buffer: { detail::Requirement *AccImpl = static_cast(Ptr); - addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size, + detail::addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size, IsKernelCreatedFromSource, GlobalSize, impl->MArgs, IsESIMD); break; @@ -1183,7 +1117,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, detail::LocalAccessorImplHost *LAccImpl = static_cast(Ptr); - addArgsForLocalAccessor(LAccImpl, Index, IndexShift, + detail::addArgsForLocalAccessor(LAccImpl, Index, IndexShift, IsKernelCreatedFromSource, impl->MArgs, IsESIMD); break; } @@ -1225,7 +1159,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, ext::oneapi::experimental::detail::dynamic_local_accessor_impl *>( DynParamImpl); - addArgsForLocalAccessor(&DynLocalAccessorImpl->LAccImplHost, Index, + detail::addArgsForLocalAccessor(&DynLocalAccessorImpl->LAccImplHost, Index, IndexShift, IsKernelCreatedFromSource, impl->MArgs, IsESIMD); break; @@ -1325,6 +1259,9 @@ void handler::extractArgsAndReqs() { } } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +// TODO: Those functions are not used anymore, remove it in the next +// ABI-breaking window. void handler::extractArgsAndReqsFromLambda( char *LambdaPtr, detail::kernel_param_desc_t (*ParamDescGetter)(int), size_t NumKernelParams, bool IsESIMD) { @@ -1377,9 +1314,6 @@ void handler::extractArgsAndReqsFromLambda( } } -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES -// TODO: Those functions are not used anymore, remove it in the next -// ABI-breaking window. void handler::extractArgsAndReqsFromLambda( char *LambdaPtr, const std::vector &ParamDescs, bool IsESIMD) { From 2a22bcb5cf8af2e408925cc32b6832af772dac9c Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Tue, 19 Aug 2025 09:06:06 +0000 Subject: [PATCH 2/7] Added extractArgsAndReqs to the arg extraction header --- sycl/include/sycl/handler.hpp | 15 ++-- sycl/source/detail/arg_extraction.hpp | 23 ++++++ sycl/source/handler.cpp | 101 +++++++------------------- 3 files changed, 55 insertions(+), 84 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 8e92ad27a95bc..5dd799f800f35 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -499,24 +499,19 @@ class __SYCL_EXPORT handler { extractArgsAndReqsFromLambda(char *LambdaPtr, size_t KernelArgsNum, const detail::kernel_param_desc_t *KernelArgs, bool IsESIMD); + void processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, + const int Size, const size_t Index, size_t &IndexShift, + bool IsKernelCreatedFromSource, bool IsESIMD); +#endif + /// Extracts and prepares kernel arguments from the lambda using information /// from the built-ins or integration header. void extractArgsAndReqsFromLambda( char *LambdaPtr, detail::kernel_param_desc_t (*ParamDescGetter)(int), size_t NumKernelParams, bool IsESIMD); -#endif - /// Extracts and prepares kernel arguments set via set_arg(s). void extractArgsAndReqs(); -#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) - // TODO: processArg need not to be public - __SYCL_DLL_LOCAL -#endif - void processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, - const int Size, const size_t Index, size_t &IndexShift, - bool IsKernelCreatedFromSource, bool IsESIMD); - /// \return a string containing name of SYCL kernel. detail::ABINeutralKernelNameStrT getKernelName(); diff --git a/sycl/source/detail/arg_extraction.hpp b/sycl/source/detail/arg_extraction.hpp index 2b52309393026..4915a70f141a4 100644 --- a/sycl/source/detail/arg_extraction.hpp +++ b/sycl/source/detail/arg_extraction.hpp @@ -351,6 +351,29 @@ void extractArgsAndReqsFromLambda( } } +void extractArgsAndReqs( + bool IsKernelCreatedFromSource, detail::NDRDescT NDRDesc, + std::vector> + DynamicParameters, + std::vector &UnPreparedArgs, + std::vector &Args) { + + Args.reserve(MaxNumAdditionalArgs * UnPreparedArgs.size()); + + size_t IndexShift = 0; + for (size_t I = 0; I < UnPreparedArgs.size(); ++I) { + void *Ptr = UnPreparedArgs[I].MPtr; + const detail::kernel_param_kind_t &Kind = UnPreparedArgs[I].MType; + const int &Size = UnPreparedArgs[I].MSize; + const int Index = UnPreparedArgs[I].MIndex; + + processArg(Ptr, Kind, Size, Index, IndexShift, + /*IsKernelCreatedFromSource=*/false, /*IsESIMD=*/false, NDRDesc, + DynamicParameters, Args); + } +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 0d69425f067d0..cf22e1c0a0fba 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -496,12 +496,9 @@ event handler::finalize() { if (type == detail::CGType::Kernel && impl->MKernelFuncPtr && (!KernelFastPath || impl->MKernelHasSpecialCaptures)) { clearArgs(); - detail::extractArgsAndReqsFromLambda((char *)impl->MKernelFuncPtr, + extractArgsAndReqsFromLambda((char *)impl->MKernelFuncPtr, impl->MKernelParamDescGetter, - impl->MKernelNumArgs, impl->MKernelIsESIMD, - Queue && Queue->hasCommandGraph(), - impl->get_graph_or_null() != nullptr, - impl->MNDRDesc, impl->MDynamicParameters, impl->MArgs); + impl->MKernelNumArgs, impl->MKernelIsESIMD); } // According to 4.7.6.9 of SYCL2020 spec, if a placeholder accessor is passed @@ -1042,6 +1039,7 @@ void handler::associateWithHandler( static_cast(AccTarget)); } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, bool IsESIMD) { @@ -1210,6 +1208,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, break; } } +#endif //__INTEL_PREVIEW_BREAKING_CHANGES void handler::setArgHelper(int ArgIndex, detail::work_group_memory_impl &Arg) { impl->MWorkGroupMemoryObjects.push_back( @@ -1224,16 +1223,6 @@ void handler::setArgHelper(int ArgIndex, stream &&Str) { ArgIndex); } -// The argument can take up more space to store additional information about -// MAccessRange, MMemoryRange, and MOffset added with addArgsForGlobalAccessor. -// We use the worst-case estimate because the lifetime of the vector is short. -// In processArg the kind_stream case introduces the maximum number of -// additional arguments. The case adds additional 12 arguments to the currently -// processed argument, hence worst-case estimate is 12+1=13. -// TODO: the constant can be removed if the size of MArgs will be calculated at -// compile time. -inline constexpr size_t MaxNumAdditionalArgs = 13; - void handler::extractArgsAndReqs() { assert(MKernel && "MKernel is not initialized"); std::vector UnPreparedArgs = std::move(impl->MArgs); @@ -1246,74 +1235,38 @@ void handler::extractArgsAndReqs() { }); const bool IsKernelCreatedFromSource = MKernel->isCreatedFromSource(); - impl->MArgs.reserve(MaxNumAdditionalArgs * UnPreparedArgs.size()); - size_t IndexShift = 0; - for (size_t I = 0; I < UnPreparedArgs.size(); ++I) { - void *Ptr = UnPreparedArgs[I].MPtr; - const detail::kernel_param_kind_t &Kind = UnPreparedArgs[I].MType; - const int &Size = UnPreparedArgs[I].MSize; - const int Index = UnPreparedArgs[I].MIndex; - processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource, - false); - } + detail::extractArgsAndReqs(IsKernelCreatedFromSource, impl->MNDRDesc, + impl->MDynamicParameters, UnPreparedArgs, + impl->MArgs); } -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES -// TODO: Those functions are not used anymore, remove it in the next -// ABI-breaking window. void handler::extractArgsAndReqsFromLambda( char *LambdaPtr, detail::kernel_param_desc_t (*ParamDescGetter)(int), size_t NumKernelParams, bool IsESIMD) { - size_t IndexShift = 0; - impl->MArgs.reserve(MaxNumAdditionalArgs * NumKernelParams); - - for (size_t I = 0; I < NumKernelParams; ++I) { - detail::kernel_param_desc_t ParamDesc = ParamDescGetter(I); - void *Ptr = LambdaPtr + ParamDesc.offset; - const detail::kernel_param_kind_t &Kind = ParamDesc.kind; - const int &Size = ParamDesc.info; - if (Kind == detail::kernel_param_kind_t::kind_accessor) { - // For args kind of accessor Size is information about accessor. - // The first 11 bits of Size encodes the accessor target. - const access::target AccTarget = - static_cast(Size & AccessTargetMask); - if ((AccTarget == access::target::device || - AccTarget == access::target::constant_buffer) || - (AccTarget == access::target::image || - AccTarget == access::target::image_array)) { - detail::AccessorBaseHost *AccBase = - static_cast(Ptr); - Ptr = detail::getSyclObjImpl(*AccBase).get(); - } else if (AccTarget == access::target::local) { - detail::LocalAccessorBaseHost *LocalAccBase = - static_cast(Ptr); - Ptr = detail::getSyclObjImpl(*LocalAccBase).get(); - } - } else if (Kind == detail::kernel_param_kind_t::kind_dynamic_accessor) { - // For args kind of accessor Size is information about accessor. - // The first 11 bits of Size encodes the accessor target. - // Only local targets are supported for dynamic accessors. - assert(static_cast(Size & AccessTargetMask) == - access::target::local); - - ext::oneapi::experimental::detail::dynamic_parameter_base - *DynamicParamBase = static_cast< - ext::oneapi::experimental::detail::dynamic_parameter_base *>(Ptr); - Ptr = detail::getSyclObjImpl(*DynamicParamBase).get(); - } else if (Kind == - detail::kernel_param_kind_t::kind_dynamic_work_group_memory) { - ext::oneapi::experimental::detail::dynamic_parameter_base - *DynamicParamBase = static_cast< - ext::oneapi::experimental::detail::dynamic_parameter_base *>(Ptr); - Ptr = detail::getSyclObjImpl(*DynamicParamBase).get(); - } + detail::queue_impl *Queue = impl->get_queue_or_null(); + bool QueueHasCommandGraph = Queue && Queue->hasCommandGraph(); + bool IsGraphSubmission = impl->get_graph_or_null() != nullptr; - processArg(Ptr, Kind, Size, I, IndexShift, - /*IsKernelCreatedFromSource=*/false, IsESIMD); - } + detail::extractArgsAndReqsFromLambda( + LambdaPtr, ParamDescGetter, NumKernelParams, IsESIMD, + QueueHasCommandGraph, IsGraphSubmission, impl->MNDRDesc, + impl->MDynamicParameters, impl->MArgs); } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +// The argument can take up more space to store additional information about +// MAccessRange, MMemoryRange, and MOffset added with addArgsForGlobalAccessor. +// We use the worst-case estimate because the lifetime of the vector is short. +// In processArg the kind_stream case introduces the maximum number of +// additional arguments. The case adds additional 12 arguments to the currently +// processed argument, hence worst-case estimate is 12+1=13. +// TODO: the constant can be removed if the size of MArgs will be calculated at +// compile time. +inline constexpr size_t MaxNumAdditionalArgs = 13; + +// TODO: Those functions are not used anymore, remove it in the next +// ABI-breaking window. void handler::extractArgsAndReqsFromLambda( char *LambdaPtr, const std::vector &ParamDescs, bool IsESIMD) { From 9b187c559367ae7eeb32f86ac488ea947854c9b2 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Tue, 19 Aug 2025 09:07:50 +0000 Subject: [PATCH 3/7] Fix formatting --- .../oneapi/experimental/work_group_memory.hpp | 28 +++++++++++-------- sycl/include/sycl/stream.hpp | 28 +++++++++++-------- sycl/source/handler.cpp | 27 +++++++++--------- 3 files changed, 46 insertions(+), 37 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 25a13e8c8a5f6..644b127e9f358 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -32,12 +32,14 @@ class NDRDescT; class ArgDesc; class dynamic_parameter_impl; -void processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, - const int Size, const size_t Index, size_t &IndexShift, - bool IsKernelCreatedFromSource, bool IsESIMD, - detail::NDRDescT NDRDesc, std::vector> - DynamicParameters, std::vector &Args); +void processArg( + void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, + const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, + bool IsESIMD, detail::NDRDescT NDRDesc, + std::vector> + DynamicParameters, + std::vector &Args); class work_group_memory_impl { public: @@ -50,12 +52,14 @@ class work_group_memory_impl { private: size_t buffer_size; friend class sycl::handler; - friend void detail::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, - const int Size, const size_t Index, size_t &IndexShift, - bool IsKernelCreatedFromSource, bool IsESIMD, - detail::NDRDescT NDRDesc, std::vector> - DynamicParameters, std::vector &Args); + friend void detail::processArg( + void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, + const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, + bool IsESIMD, detail::NDRDescT NDRDesc, + std::vector> + DynamicParameters, + std::vector &Args); }; } // namespace detail diff --git a/sycl/include/sycl/stream.hpp b/sycl/include/sycl/stream.hpp index 1309e2a9d14c3..0871698a1ec4c 100644 --- a/sycl/include/sycl/stream.hpp +++ b/sycl/include/sycl/stream.hpp @@ -41,12 +41,14 @@ inline namespace _V1 { namespace detail { -void processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, - const int Size, const size_t Index, size_t &IndexShift, - bool IsKernelCreatedFromSource, bool IsESIMD, - detail::NDRDescT NDRDesc, std::vector> - DynamicParameters, std::vector &Args); +void processArg( + void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, + const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, + bool IsESIMD, detail::NDRDescT NDRDesc, + std::vector> + DynamicParameters, + std::vector &Args); class stream_impl; @@ -1049,12 +1051,14 @@ class __SYCL_EXPORT __SYCL_SPECIAL_CLASS __SYCL_TYPE(stream) stream #endif friend class handler; - friend void detail::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, - const int Size, const size_t Index, size_t &IndexShift, - bool IsKernelCreatedFromSource, bool IsESIMD, - detail::NDRDescT NDRDesc, std::vector> - DynamicParameters, std::vector &Args); + friend void detail::processArg( + void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, + const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, + bool IsESIMD, detail::NDRDescT NDRDesc, + std::vector> + DynamicParameters, + std::vector &Args); template friend class ext::oneapi::weak_object; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index cf22e1c0a0fba..d29b39a40dbc0 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -10,6 +10,7 @@ #include "ur_api.h" #include +#include #include #include #include @@ -34,7 +35,6 @@ #include #include #include -#include #include #include @@ -1063,15 +1063,15 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, static_cast(&S->GlobalBuf); detail::Requirement *GBufReq = &*detail::getSyclObjImpl(*GBufBase); detail::addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size, - IsKernelCreatedFromSource, GlobalSize, impl->MArgs, - IsESIMD); + IsKernelCreatedFromSource, GlobalSize, + impl->MArgs, IsESIMD); ++IndexShift; detail::AccessorBaseHost *GOffsetBase = static_cast(&S->GlobalOffset); detail::Requirement *GOffsetReq = &*detail::getSyclObjImpl(*GOffsetBase); detail::addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size, - IsKernelCreatedFromSource, GlobalSize, impl->MArgs, - IsESIMD); + IsKernelCreatedFromSource, GlobalSize, + impl->MArgs, IsESIMD); ++IndexShift; detail::AccessorBaseHost *GFlushBase = static_cast(&S->GlobalFlushBuf); @@ -1089,8 +1089,8 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, } } detail::addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size, - IsKernelCreatedFromSource, GlobalSize, impl->MArgs, - IsESIMD); + IsKernelCreatedFromSource, GlobalSize, + impl->MArgs, IsESIMD); ++IndexShift; addArg(kernel_param_kind_t::kind_std_layout, &S->FlushBufferSize, sizeof(S->FlushBufferSize), Index + IndexShift); @@ -1107,8 +1107,8 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, case access::target::constant_buffer: { detail::Requirement *AccImpl = static_cast(Ptr); detail::addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size, - IsKernelCreatedFromSource, GlobalSize, - impl->MArgs, IsESIMD); + IsKernelCreatedFromSource, GlobalSize, + impl->MArgs, IsESIMD); break; } case access::target::local: { @@ -1116,7 +1116,8 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, static_cast(Ptr); detail::addArgsForLocalAccessor(LAccImpl, Index, IndexShift, - IsKernelCreatedFromSource, impl->MArgs, IsESIMD); + IsKernelCreatedFromSource, impl->MArgs, + IsESIMD); break; } case access::target::image: @@ -1157,9 +1158,9 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, ext::oneapi::experimental::detail::dynamic_local_accessor_impl *>( DynParamImpl); - detail::addArgsForLocalAccessor(&DynLocalAccessorImpl->LAccImplHost, Index, - IndexShift, IsKernelCreatedFromSource, - impl->MArgs, IsESIMD); + detail::addArgsForLocalAccessor( + &DynLocalAccessorImpl->LAccImplHost, Index, IndexShift, + IsKernelCreatedFromSource, impl->MArgs, IsESIMD); break; } default: { From c6b8d77dd4351e92b0d06c300907c71c30d478d5 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Tue, 19 Aug 2025 11:16:37 +0000 Subject: [PATCH 4/7] Fix type dependencies --- .../oneapi/experimental/work_group_memory.hpp | 15 +++++++++------ sycl/include/sycl/stream.hpp | 4 ++-- sycl/source/detail/arg_extraction.hpp | 16 ++++++---------- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 644b127e9f358..47bd3dad7a70a 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -20,6 +20,10 @@ namespace sycl { inline namespace _V1 { class handler; +namespace ext::oneapi::experimental::detail { +class dynamic_parameter_impl; +} + namespace detail { template struct is_unbounded_array : std::false_type {}; @@ -30,12 +34,11 @@ inline constexpr bool is_unbounded_array_v = is_unbounded_array::value; class NDRDescT; class ArgDesc; -class dynamic_parameter_impl; void processArg( - void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, + void *Ptr, const kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, - bool IsESIMD, detail::NDRDescT NDRDesc, + bool IsESIMD, NDRDescT NDRDesc, std::vector> DynamicParameters, @@ -52,10 +55,10 @@ class work_group_memory_impl { private: size_t buffer_size; friend class sycl::handler; - friend void detail::processArg( - void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, + friend void processArg( + void *Ptr, const kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, - bool IsESIMD, detail::NDRDescT NDRDesc, + bool IsESIMD, NDRDescT NDRDesc, std::vector> DynamicParameters, diff --git a/sycl/include/sycl/stream.hpp b/sycl/include/sycl/stream.hpp index 0871698a1ec4c..935f4a7b574f6 100644 --- a/sycl/include/sycl/stream.hpp +++ b/sycl/include/sycl/stream.hpp @@ -42,9 +42,9 @@ inline namespace _V1 { namespace detail { void processArg( - void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, + void *Ptr, const kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, - bool IsESIMD, detail::NDRDescT NDRDesc, + bool IsESIMD, NDRDescT NDRDesc, std::vector> DynamicParameters, diff --git a/sycl/source/detail/arg_extraction.hpp b/sycl/source/detail/arg_extraction.hpp index 4915a70f141a4..6f1a8fc8451fd 100644 --- a/sycl/source/detail/arg_extraction.hpp +++ b/sycl/source/detail/arg_extraction.hpp @@ -14,17 +14,14 @@ #include #include #include +#include +#include + +#include namespace sycl { inline namespace _V1 { -namespace ext::oneapi::experimental::detail { -class dynamic_parameter_base; -class dynamic_work_group_memory_base; -class dynamic_local_accessor_base; -class graph_impl; -class dynamic_parameter_impl; -} // namespace ext::oneapi::experimental::detail namespace detail { inline constexpr size_t MaxNumAdditionalArgs = 13; @@ -98,14 +95,13 @@ void addArgsForLocalAccessor(detail::LocalAccessorImplHost *LAcc, size_t Index, } void processArg( - void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, + void *Ptr, const kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, - bool IsESIMD, detail::NDRDescT NDRDesc, + bool IsESIMD, NDRDescT NDRDesc, std::vector> DynamicParameters, std::vector &Args) { - using detail::kernel_param_kind_t; size_t GlobalSize = NDRDesc.GlobalSize[0]; for (size_t I = 1; I < NDRDesc.Dims; ++I) { GlobalSize *= NDRDesc.GlobalSize[I]; From dbad62e0639db1ee583cd0cd1dc101ccaedc45fc Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Tue, 19 Aug 2025 11:21:39 +0000 Subject: [PATCH 5/7] Add MaxNumAdditionalArgs comment --- sycl/source/detail/arg_extraction.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sycl/source/detail/arg_extraction.hpp b/sycl/source/detail/arg_extraction.hpp index 6f1a8fc8451fd..2a2ee12886698 100644 --- a/sycl/source/detail/arg_extraction.hpp +++ b/sycl/source/detail/arg_extraction.hpp @@ -24,6 +24,14 @@ inline namespace _V1 { namespace detail { +// The argument can take up more space to store additional information about +// MAccessRange, MMemoryRange, and MOffset added with addArgsForGlobalAccessor. +// We use the worst-case estimate because the lifetime of the vector is short. +// In processArg the kind_stream case introduces the maximum number of +// additional arguments. The case adds additional 12 arguments to the currently +// processed argument, hence worst-case estimate is 12+1=13. +// TODO: the constant can be removed if the size of MArgs will be calculated at +// compile time. inline constexpr size_t MaxNumAdditionalArgs = 13; constexpr static int AccessTargetMask = 0x7ff; From a3f3fbddc49222a572c48eda856a50c35baf5a7b Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Tue, 19 Aug 2025 13:47:11 +0000 Subject: [PATCH 6/7] Inline the functions --- sycl/source/detail/arg_extraction.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/arg_extraction.hpp b/sycl/source/detail/arg_extraction.hpp index 2a2ee12886698..bcd33ac6b31c6 100644 --- a/sycl/source/detail/arg_extraction.hpp +++ b/sycl/source/detail/arg_extraction.hpp @@ -35,7 +35,7 @@ namespace detail { inline constexpr size_t MaxNumAdditionalArgs = 13; constexpr static int AccessTargetMask = 0x7ff; -void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, +inline void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, size_t &IndexShift, int Size, bool IsKernelCreatedFromSource, size_t GlobalSize, std::vector &Args, @@ -69,7 +69,7 @@ void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, } } -void addArgsForLocalAccessor(detail::LocalAccessorImplHost *LAcc, size_t Index, +inline void addArgsForLocalAccessor(detail::LocalAccessorImplHost *LAcc, size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, std::vector &Args, bool IsESIMD) { using detail::kernel_param_kind_t; @@ -102,7 +102,7 @@ void addArgsForLocalAccessor(detail::LocalAccessorImplHost *LAcc, size_t Index, } } -void processArg( +inline void processArg( void *Ptr, const kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, bool IsESIMD, NDRDescT NDRDesc, @@ -276,7 +276,7 @@ void processArg( } } -void validateDynamicParameterGraphState(bool QueueHasCommandGraph, +inline void validateDynamicParameterGraphState(bool QueueHasCommandGraph, bool IsGraphSubmission) { if (QueueHasCommandGraph) { throw sycl::exception( @@ -290,7 +290,7 @@ void validateDynamicParameterGraphState(bool QueueHasCommandGraph, } } -void extractArgsAndReqsFromLambda( +inline void extractArgsAndReqsFromLambda( char *LambdaPtr, detail::kernel_param_desc_t (*ParamDescGetter)(int), size_t NumKernelParams, bool IsESIMD, bool QueueHasCommandGraph, bool IsGraphSubmission, detail::NDRDescT NDRDesc, @@ -355,7 +355,7 @@ void extractArgsAndReqsFromLambda( } } -void extractArgsAndReqs( +inline void extractArgsAndReqs( bool IsKernelCreatedFromSource, detail::NDRDescT NDRDesc, std::vector> From a859008aa7bfb3e39b46125cd11901bc9fb36816 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Tue, 19 Aug 2025 13:52:59 +0000 Subject: [PATCH 7/7] Fix formatting --- sycl/source/detail/arg_extraction.hpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/sycl/source/detail/arg_extraction.hpp b/sycl/source/detail/arg_extraction.hpp index bcd33ac6b31c6..dae8e53756a07 100644 --- a/sycl/source/detail/arg_extraction.hpp +++ b/sycl/source/detail/arg_extraction.hpp @@ -36,10 +36,11 @@ inline constexpr size_t MaxNumAdditionalArgs = 13; constexpr static int AccessTargetMask = 0x7ff; inline void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, - size_t &IndexShift, int Size, - bool IsKernelCreatedFromSource, size_t GlobalSize, - std::vector &Args, - bool isESIMD) { + size_t &IndexShift, int Size, + bool IsKernelCreatedFromSource, + size_t GlobalSize, + std::vector &Args, + bool isESIMD) { using detail::kernel_param_kind_t; if (AccImpl->PerWI) AccImpl->resize(GlobalSize); @@ -69,9 +70,11 @@ inline void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, } } -inline void addArgsForLocalAccessor(detail::LocalAccessorImplHost *LAcc, size_t Index, - size_t &IndexShift, bool IsKernelCreatedFromSource, - std::vector &Args, bool IsESIMD) { +inline void addArgsForLocalAccessor(detail::LocalAccessorImplHost *LAcc, + size_t Index, size_t &IndexShift, + bool IsKernelCreatedFromSource, + std::vector &Args, + bool IsESIMD) { using detail::kernel_param_kind_t; range<3> &LAccSize = LAcc->MSize; @@ -277,7 +280,7 @@ inline void processArg( } inline void validateDynamicParameterGraphState(bool QueueHasCommandGraph, - bool IsGraphSubmission) { + bool IsGraphSubmission) { if (QueueHasCommandGraph) { throw sycl::exception( make_error_code(errc::invalid),