diff --git a/sycl/include/sycl/detail/optional.hpp b/sycl/include/sycl/detail/optional.hpp new file mode 100644 index 0000000000000..da9ff4d900000 --- /dev/null +++ b/sycl/include/sycl/detail/optional.hpp @@ -0,0 +1,147 @@ +//==-------- optional.hpp - limited variant of std::optional -------- C++ --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===--------------------------------------------------------------------=== // + +#pragma once + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +// ABI-stable implementation of optional to avoid reliance on potentially +// differing implementations of std::optional when crossing the library +// boundary. +template class optional { +public: + constexpr optional() noexcept {} + constexpr optional(std::nullopt_t) noexcept : optional() {} + + template + constexpr optional(const optional &Other) + : ContainsValue{Other.ContainsValue} { + new (Storage) T(Other.Value); + } + template + constexpr optional(optional &&Other) + : ContainsValue{std::move(Other.ContainsValue)} { + new (Storage) T(std::move(Other.Value)); + } + + constexpr optional(T &&Value) : ContainsValue{true} { + new (Storage) T(std::move(Value)); + } + + constexpr optional(const T &Value) : ContainsValue{true} { + new (Storage) T(Value); + } + + template + constexpr optional(const std::optional &Other) : ContainsValue{Other} { + if (Other) + new (Storage) T(*Other); + } + + ~optional() { + if (has_value()) + reinterpret_cast(Storage)->~T(); + } + + optional &operator=(std::nullopt_t) noexcept { + if (has_value()) + reinterpret_cast(Storage)->~T(); + ContainsValue = false; + return *this; + } + + template optional &operator=(const optional &Other) { + if (has_value()) + reinterpret_cast(Storage)->~T(); + ContainsValue = Other; + new (Storage) T(Other.Value); + return *this; + } + template optional &operator=(optional &&Other) noexcept { + if (has_value()) + reinterpret_cast(Storage)->~T(); + ContainsValue = Other; + new (Storage) T(std::move(Other.Value)); + return *this; + } + + optional &operator=(T &&Value) { + if (has_value()) + reinterpret_cast(Storage)->~T(); + ContainsValue = true; + new (Storage) T(std::move(Value)); + return *this; + } + + optional &operator=(const T &Value) { + if (has_value()) + reinterpret_cast(Storage)->~T(); + ContainsValue = true; + new (Storage) T(Value); + return *this; + } + + template optional &operator=(const std::optional &Other) { + if (has_value()) + reinterpret_cast(Storage)->~T(); + ContainsValue = Other; + if (Other) + new (Storage) T(*Other); + return *this; + } + + constexpr bool has_value() const noexcept { return ContainsValue; } + constexpr explicit operator bool() const noexcept { return has_value(); } + + constexpr T &value() & { + if (!has_value()) + throw std::bad_optional_access{}; + return *reinterpret_cast(Storage); + } + constexpr const T &value() const & { + if (!has_value()) + throw std::bad_optional_access{}; + return *reinterpret_cast(Storage); + } + constexpr T &&value() && { + if (!has_value()) + throw std::bad_optional_access{}; + return std::move(*reinterpret_cast(Storage)); + } + constexpr const T &&value() const && { + if (!has_value()) + throw std::bad_optional_access{}; + return std::move(*reinterpret_cast(Storage)); + } + + template constexpr T value_or(U &&DefaultVal) { + return has_value() ? value() : static_cast(std::forward(DefaultVal)); + } + template constexpr T value_or(U &&DefaultVal) const { + return has_value() ? std::move(value()) + : static_cast(std::forward(DefaultVal)); + } + + constexpr T &operator*() & { return value(); } + constexpr const T &operator*() const & { return value(); } + constexpr T &&operator*() && { return value(); } + constexpr const T &&operator*() const && { return value(); } + +private: + alignas(alignof(T)) char Storage[sizeof(T)] = {0}; + bool ContainsValue = false; +}; + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 1ed95e756ca53..9e530604ce84e 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -22,12 +22,13 @@ #include // for __SYCL_EXPORT #include // for is_queue_info_... #include // for KernelInfo -#include // for OwnerLessBase -#include // for device -#include // for device_selector -#include // for event -#include // for make_error_code -#include // for defaultAsyncHa... +#include +#include // for OwnerLessBase +#include // for device +#include // for device_selector +#include // for event +#include // for make_error_code +#include // for defaultAsyncHa... #include // for device_global #include // for device_image_s... #include // for command_graph... @@ -81,6 +82,30 @@ class queue_impl; inline event submitAssertCapture(queue &, event &, queue *, const detail::code_location &); #endif + +// Function to postprocess submitted command +// Arguments: +// bool IsKernel - true if the submitted command was kernel, false otherwise +// bool KernelUsesAssert - true if submitted kernel uses assert, only +// meaningful when IsKernel is true +// event &Event - event after which post processing should be executed +using SubmitPostProcessF = std::function; + +struct SubmissionInfoImpl; + +class __SYCL_EXPORT SubmissionInfo { +public: + SubmissionInfo(); + + sycl::detail::optional &PostProcessorFunc(); + const sycl::detail::optional &PostProcessorFunc() const; + + std::shared_ptr &SecondaryQueue(); + const std::shared_ptr &SecondaryQueue() const; + +private: + std::shared_ptr impl = nullptr; +}; } // namespace detail namespace ext ::oneapi ::experimental { @@ -340,28 +365,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { std::enable_if_t, event> submit( T CGF, const detail::code_location &CodeLoc = detail::code_location::current()) { - detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); -#if __SYCL_USE_FALLBACK_ASSERT - auto PostProcess = [this, &TlsCodeLocCapture]( - bool IsKernel, bool KernelUsesAssert, event &E) { - if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) && - KernelUsesAssert && !device_has(aspect::accelerator)) { - // __devicelib_assert_fail isn't supported by Device-side Runtime - // Linking against fallback impl of __devicelib_assert_fail is - // performed by program manager class - // Fallback assert isn't supported for FPGA - submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr, - TlsCodeLocCapture.query()); - } - }; - - return submit_impl_and_postprocess(CGF, TlsCodeLocCapture.query(), - PostProcess, - TlsCodeLocCapture.isToplevel()); -#else - return submit_impl(CGF, TlsCodeLocCapture.query(), - TlsCodeLocCapture.isToplevel()); -#endif // __SYCL_USE_FALLBACK_ASSERT + return submit_with_event(CGF, /*SecondaryQueuePtr=*/nullptr, CodeLoc); } /// Submits a command group function object to the queue, in order to be @@ -379,30 +383,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { std::enable_if_t, event> submit( T CGF, queue &SecondaryQueue, const detail::code_location &CodeLoc = detail::code_location::current()) { - detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); -#if __SYCL_USE_FALLBACK_ASSERT - auto PostProcess = [this, &SecondaryQueue, &TlsCodeLocCapture]( - bool IsKernel, bool KernelUsesAssert, event &E) { - if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) && - KernelUsesAssert && !device_has(aspect::accelerator)) { - // Only secondary queues on devices need to be added to the assert - // capture. - // __devicelib_assert_fail isn't supported by Device-side Runtime - // Linking against fallback impl of __devicelib_assert_fail is - // performed by program manager class - // Fallback assert isn't supported for FPGA - submitAssertCapture(*this, E, &SecondaryQueue, - TlsCodeLocCapture.query()); - } - }; - - return submit_impl_and_postprocess(CGF, SecondaryQueue, - TlsCodeLocCapture.query(), PostProcess, - TlsCodeLocCapture.isToplevel()); -#else - return submit_impl(CGF, SecondaryQueue, TlsCodeLocCapture.query(), - TlsCodeLocCapture.isToplevel()); -#endif // __SYCL_USE_FALLBACK_ASSERT + return submit_with_event(CGF, &SecondaryQueue, CodeLoc); } /// Prevents any commands submitted afterward to this queue from executing @@ -2770,23 +2751,84 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { queue &Q, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc); - /// A template-free version of submit. +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + /// TODO: Unused. Remove these when ABI-break window is open. event submit_impl(std::function CGH, const detail::code_location &CodeLoc); event submit_impl(std::function CGH, const detail::code_location &CodeLoc, bool IsTopCodeLoc); - /// A template-free version of submit. event submit_impl(std::function CGH, queue secondQueue, const detail::code_location &CodeLoc); event submit_impl(std::function CGH, queue secondQueue, const detail::code_location &CodeLoc, bool IsTopCodeLoc); - - /// A template-free version of submit_without_event. void submit_without_event_impl(std::function CGH, const detail::code_location &CodeLoc); void submit_without_event_impl(std::function CGH, const detail::code_location &CodeLoc, bool IsTopCodeLoc); + event + submit_impl_and_postprocess(std::function CGH, + const detail::code_location &CodeLoc, + const detail::SubmitPostProcessF &PostProcess); + event submit_impl_and_postprocess( + std::function CGH, const detail::code_location &CodeLoc, + const detail::SubmitPostProcessF &PostProcess, bool IsTopCodeLoc); + event + submit_impl_and_postprocess(std::function CGH, + queue secondQueue, + const detail::code_location &CodeLoc, + const detail::SubmitPostProcessF &PostProcess); + event submit_impl_and_postprocess( + std::function CGH, queue secondQueue, + const detail::code_location &CodeLoc, + const detail::SubmitPostProcessF &PostProcess, bool IsTopCodeLoc); +#endif // __INTEL_PREVIEW_BREAKING_CHANGES + + /// A template-free versions of submit. + event submit_with_event_impl(std::function CGH, + const detail::SubmissionInfo &SubmitInfo, + const detail::code_location &CodeLoc, + bool IsTopCodeLoc); + + /// A template-free version of submit_without_event. + void submit_without_event_impl(std::function CGH, + const detail::SubmissionInfo &SubmitInfo, + const detail::code_location &CodeLoc, + bool IsTopCodeLoc); + + /// Submits a command group function object to the queue, in order to be + /// scheduled for execution on the device. + /// + /// \param CGF is a function object containing command group. + /// \param CodeLoc is the code location of the submit call (default argument) + /// \return a SYCL event object for the submitted command group. + template + std::enable_if_t, event> + submit_with_event( + T CGF, queue *SecondaryQueuePtr, + const detail::code_location &CodeLoc = detail::code_location::current()) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + detail::SubmissionInfo SI{}; + if (SecondaryQueuePtr) + SI.SecondaryQueue() = detail::getSyclObjImpl(*SecondaryQueuePtr); +#if __SYCL_USE_FALLBACK_ASSERT + SI.PostProcessorFunc() = + [this, &SecondaryQueuePtr, + &TlsCodeLocCapture](bool IsKernel, bool KernelUsesAssert, event &E) { + if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) && + KernelUsesAssert && !device_has(aspect::accelerator)) { + // __devicelib_assert_fail isn't supported by Device-side Runtime + // Linking against fallback impl of __devicelib_assert_fail is + // performed by program manager class + // Fallback assert isn't supported for FPGA + submitAssertCapture(*this, E, SecondaryQueuePtr, + TlsCodeLocCapture.query()); + } + }; +#endif // __SYCL_USE_FALLBACK_ASSERT + return submit_with_event_impl(CGF, SI, TlsCodeLocCapture.query(), + TlsCodeLocCapture.isToplevel()); + } /// Submits a command group function object to the queue, in order to be /// scheduled for execution on the device. @@ -2796,53 +2838,18 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { template std::enable_if_t, void> submit_without_event(T CGF, const detail::code_location &CodeLoc) { - detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); #if __SYCL_USE_FALLBACK_ASSERT // If post-processing is needed, fall back to the regular submit. // TODO: Revisit whether we can avoid this. - submit(CGF, TlsCodeLocCapture.query()); + submit_with_event(CGF, nullptr, CodeLoc); #else - submit_without_event_impl(CGF, TlsCodeLocCapture.query(), + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + detail::SubmissionInfo SI{}; + submit_without_event_impl(CGF, SI, TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); #endif // __SYCL_USE_FALLBACK_ASSERT } - // Function to postprocess submitted command - // Arguments: - // bool IsKernel - true if the submitted command was kernel, false otherwise - // bool KernelUsesAssert - true if submitted kernel uses assert, only - // meaningful when IsKernel is true - // event &Event - event after which post processing should be executed - using SubmitPostProcessF = std::function; - - /// A template-free version of submit. - /// \param CGH command group function/handler - /// \param CodeLoc code location - /// - /// This method stores additional information within event_impl class instance - event submit_impl_and_postprocess(std::function CGH, - const detail::code_location &CodeLoc, - const SubmitPostProcessF &PostProcess); - event submit_impl_and_postprocess(std::function CGH, - const detail::code_location &CodeLoc, - const SubmitPostProcessF &PostProcess, - bool IsTopCodeLoc); - /// A template-free version of submit. - /// \param CGH command group function/handler - /// \param secondQueue fallback queue - /// \param CodeLoc code location - /// - /// This method stores additional information within event_impl class instance - event submit_impl_and_postprocess(std::function CGH, - queue secondQueue, - const detail::code_location &CodeLoc, - const SubmitPostProcessF &PostProcess); - event submit_impl_and_postprocess(std::function CGH, - queue secondQueue, - const detail::code_location &CodeLoc, - const SubmitPostProcessF &PostProcess, - bool IsTopCodeLoc); - /// parallel_for_impl with a kernel represented as a lambda + range that /// specifies global size only. /// @@ -3064,13 +3071,8 @@ event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue, }); }; - if (SecondaryQueue) { - CopierEv = Self.submit_impl(CopierCGF, *SecondaryQueue, CodeLoc); - CheckerEv = Self.submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc); - } else { - CopierEv = Self.submit_impl(CopierCGF, CodeLoc); - CheckerEv = Self.submit_impl(CheckerCGF, CodeLoc); - } + CopierEv = Self.submit_with_event(CopierCGF, SecondaryQueue, CodeLoc); + CheckerEv = Self.submit_with_event(CheckerCGF, SecondaryQueue, CodeLoc); return CheckerEv; } diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 428f06ea0aaa4..ab8348d3aacac 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -355,7 +355,7 @@ event queue_impl::submit_impl(const std::function &CGF, bool CallerNeedsEvent, const detail::code_location &Loc, bool IsTopCodeLoc, - const SubmitPostProcessF *PostProcess) { + const SubmissionInfo &SubmitInfo) { handler Handler(Self, PrimaryQueue, SecondaryQueue, CallerNeedsEvent); Handler.saveCodeLoc(Loc, IsTopCodeLoc); @@ -374,7 +374,9 @@ event queue_impl::submit_impl(const std::function &CGF, if (Type == CGType::Kernel) Streams = std::move(Handler.MStreamStorage); - if (PostProcess) { + if (SubmitInfo.PostProcessorFunc()) { + auto &PostProcess = *SubmitInfo.PostProcessorFunc(); + bool IsKernel = Type == CGType::Kernel; bool KernelUsesAssert = false; @@ -385,7 +387,7 @@ event queue_impl::submit_impl(const std::function &CGF, Handler.MKernelName.c_str()); finalizeHandler(Handler, Event); - (*PostProcess)(IsKernel, KernelUsesAssert, Event); + PostProcess(IsKernel, KernelUsesAssert, Event); } else finalizeHandler(Handler, Event); @@ -416,7 +418,7 @@ event queue_impl::submitWithHandler(const std::shared_ptr &Self, CGH.depends_on(DepEvents); HandlerFunc(CGH); }, - Self, /*CodeLoc*/ {}, /*IsTopCodeLoc*/ true); + Self, /*CodeLoc*/ {}, /*SubmissionInfo*/ {}, /*IsTopCodeLoc*/ true); } template diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 42e769bbe2025..2daef04280c05 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -67,6 +67,12 @@ constexpr CUDAContextT DefaultContextType = CUDAContextT::custom; enum QueueOrder { Ordered, OOO }; +// Implementation of the submission information storage. +struct SubmissionInfoImpl { + optional MPostProcessorFunc = std::nullopt; + std::shared_ptr MSecondaryQueue = nullptr; +}; + class queue_impl { public: // \return a default context for the platform if it includes the device @@ -319,8 +325,6 @@ class queue_impl { } } - using SubmitPostProcessF = std::function; - /// Submits a command group function object to the queue, in order to be /// scheduled for execution on the device. /// @@ -340,16 +344,11 @@ class queue_impl { const detail::code_location &Loc, bool IsTopCodeLoc, const SubmitPostProcessF *PostProcess = nullptr) { event ResEvent; - try { - ResEvent = submit_impl(CGF, Self, Self, SecondQueue, - /*CallerNeedsEvent=*/true, Loc, IsTopCodeLoc, - PostProcess); - } catch (...) { - ResEvent = SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue, - /*CallerNeedsEvent=*/true, Loc, - IsTopCodeLoc, PostProcess); - } - return discard_or_return(ResEvent); + SubmissionInfo SI{}; + SI.SecondaryQueue() = SecondQueue; + if (PostProcess) + SI.PostProcessorFunc() = *PostProcess; + return submit_with_event(CGF, Self, SI, Loc, IsTopCodeLoc); } /// Submits a command group function object to the queue, in order to be @@ -357,25 +356,55 @@ class queue_impl { /// /// \param CGF is a function object containing command group. /// \param Self is a shared_ptr to this queue. + /// \param SubmitInfo is additional optional information for the submission. /// \param Loc is the code location of the submit call (default argument) /// \param StoreAdditionalInfo makes additional info be stored in event_impl /// \return a SYCL event object for the submitted command group. - event submit(const std::function &CGF, - const std::shared_ptr &Self, - const detail::code_location &Loc, bool IsTopCodeLoc, - const SubmitPostProcessF *PostProcess = nullptr) { - auto ResEvent = + event submit_with_event(const std::function &CGF, + const std::shared_ptr &Self, + const SubmissionInfo &SubmitInfo, + const detail::code_location &Loc, bool IsTopCodeLoc) { + if (SubmitInfo.SecondaryQueue()) { + event ResEvent; + const std::shared_ptr SecondQueue = + SubmitInfo.SecondaryQueue(); + try { + ResEvent = submit_impl(CGF, Self, Self, SecondQueue, + /*CallerNeedsEvent=*/true, Loc, IsTopCodeLoc, + SubmitInfo); + } catch (...) { + ResEvent = SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue, + /*CallerNeedsEvent=*/true, Loc, + IsTopCodeLoc, SubmitInfo); + } + return ResEvent; + } + event ResEvent = submit_impl(CGF, Self, Self, nullptr, - /*CallerNeedsEvent=*/true, Loc, IsTopCodeLoc, PostProcess); + /*CallerNeedsEvent=*/true, Loc, IsTopCodeLoc, SubmitInfo); return discard_or_return(ResEvent); } void submit_without_event(const std::function &CGF, const std::shared_ptr &Self, - const detail::code_location &Loc, bool IsTopCodeLoc, - const SubmitPostProcessF *PostProcess = nullptr) { - submit_impl(CGF, Self, Self, nullptr, /*CallerNeedsEvent=*/false, Loc, - IsTopCodeLoc, PostProcess); + const SubmissionInfo &SubmitInfo, + const detail::code_location &Loc, + bool IsTopCodeLoc) { + if (SubmitInfo.SecondaryQueue()) { + const std::shared_ptr SecondQueue = + SubmitInfo.SecondaryQueue(); + try { + submit_impl(CGF, Self, Self, SecondQueue, + /*CallerNeedsEvent=*/false, Loc, IsTopCodeLoc, SubmitInfo); + } catch (...) { + SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue, + /*CallerNeedsEvent=*/false, Loc, IsTopCodeLoc, + SubmitInfo); + } + } else { + submit_impl(CGF, Self, Self, nullptr, /*CallerNeedsEvent=*/false, Loc, + IsTopCodeLoc, SubmitInfo); + } } /// Performs a blocking wait for the completion of all enqueued tasks in the @@ -822,13 +851,14 @@ class queue_impl { /// \param CallerNeedsEvent is a boolean indicating whether the event is /// required by the user after the call. /// \param Loc is the code location of the submit call (default argument) + /// \param SubmitInfo is additional optional information for the submission. /// \return a SYCL event representing submitted command group. event submit_impl(const std::function &CGF, const std::shared_ptr &Self, const std::shared_ptr &PrimaryQueue, const std::shared_ptr &SecondaryQueue, bool CallerNeedsEvent, const detail::code_location &Loc, - bool IsTopCodeLoc, const SubmitPostProcessF *PostProcess); + bool IsTopCodeLoc, const SubmissionInfo &SubmitInfo); /// Helper function for submitting a memory operation with a handler. /// \param Self is a shared_ptr to this queue. diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 43abe91b20014..ac7273081410a 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -20,6 +20,28 @@ namespace sycl { inline namespace _V1 { +namespace detail { +SubmissionInfo::SubmissionInfo() + : impl{std::make_shared()} {} + +optional &SubmissionInfo::PostProcessorFunc() { + return impl->MPostProcessorFunc; +} + +const optional &SubmissionInfo::PostProcessorFunc() const { + return impl->MPostProcessorFunc; +} + +std::shared_ptr &SubmissionInfo::SecondaryQueue() { + return impl->MSecondaryQueue; +} + +const std::shared_ptr & +SubmissionInfo::SecondaryQueue() const { + return impl->MSecondaryQueue; +} +} // namespace detail + queue::queue(const context &SyclContext, const device_selector &DeviceSelector, const async_handler &AsyncHandler, const property_list &PropList) { const std::vector Devs = SyclContext.get_devices(); @@ -164,14 +186,16 @@ event queue::mem_advise(const void *Ptr, size_t Length, int Advice, /*CallerNeedsEvent=*/true); } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +/// TODO: Unused. Remove these when ABI-break window is open. event queue::submit_impl(std::function CGH, const detail::code_location &CodeLoc) { - return impl->submit(CGH, impl, CodeLoc, true); + return submit_with_event_impl(CGH, {}, CodeLoc, true); } event queue::submit_impl(std::function CGH, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { - return impl->submit(CGH, impl, CodeLoc, IsTopCodeLoc); + return submit_with_event_impl(CGH, {}, CodeLoc, IsTopCodeLoc); } event queue::submit_impl(std::function CGH, queue SecondQueue, @@ -186,40 +210,57 @@ event queue::submit_impl(std::function CGH, queue SecondQueue, void queue::submit_without_event_impl(std::function CGH, const detail::code_location &CodeLoc) { - return impl->submit_without_event(CGH, impl, CodeLoc, true); + submit_without_event_impl(CGH, {}, CodeLoc, true); } void queue::submit_without_event_impl(std::function CGH, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { - return impl->submit_without_event(CGH, impl, CodeLoc, IsTopCodeLoc); + submit_without_event_impl(CGH, {}, CodeLoc, IsTopCodeLoc); } event queue::submit_impl_and_postprocess( std::function CGH, const detail::code_location &CodeLoc, - const SubmitPostProcessF &PostProcess) { - return impl->submit(CGH, impl, CodeLoc, true, &PostProcess); + const detail::SubmitPostProcessF &PostProcess) { + detail::SubmissionInfo SI{}; + SI.PostProcessorFunc() = std::move(PostProcess); + return submit_with_event_impl(CGH, SI, CodeLoc, true); } -event queue::submit_impl_and_postprocess(std::function CGH, - const detail::code_location &CodeLoc, - const SubmitPostProcessF &PostProcess, - bool IsTopCodeLoc) { - return impl->submit(CGH, impl, CodeLoc, IsTopCodeLoc, &PostProcess); +event queue::submit_impl_and_postprocess( + std::function CGH, const detail::code_location &CodeLoc, + const detail::SubmitPostProcessF &PostProcess, bool IsTopCodeLoc) { + detail::SubmissionInfo SI{}; + SI.PostProcessorFunc() = std::move(PostProcess); + return submit_with_event_impl(CGH, SI, CodeLoc, IsTopCodeLoc); } event queue::submit_impl_and_postprocess( std::function CGH, queue SecondQueue, const detail::code_location &CodeLoc, - const SubmitPostProcessF &PostProcess) { + const detail::SubmitPostProcessF &PostProcess) { return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, true, &PostProcess); } -event queue::submit_impl_and_postprocess(std::function CGH, - queue SecondQueue, - const detail::code_location &CodeLoc, - const SubmitPostProcessF &PostProcess, - bool IsTopCodeLoc) { +event queue::submit_impl_and_postprocess( + std::function CGH, queue SecondQueue, + const detail::code_location &CodeLoc, + const detail::SubmitPostProcessF &PostProcess, bool IsTopCodeLoc) { return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, IsTopCodeLoc, &PostProcess); } +#endif // __INTEL_PREVIEW_BREAKING_CHANGES + +event queue::submit_with_event_impl(std::function CGH, + const detail::SubmissionInfo &SubmitInfo, + const detail::code_location &CodeLoc, + bool IsTopCodeLoc) { + return impl->submit_with_event(CGH, impl, SubmitInfo, CodeLoc, IsTopCodeLoc); +} + +void queue::submit_without_event_impl(std::function CGH, + const detail::SubmissionInfo &SubmitInfo, + const detail::code_location &CodeLoc, + bool IsTopCodeLoc) { + impl->submit_without_event(CGH, impl, SubmitInfo, CodeLoc, IsTopCodeLoc); +} void queue::wait_proxy(const detail::code_location &CodeLoc) { impl->wait(CodeLoc); diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 621765568d50c..a5134a7a524ca 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3133,10 +3133,12 @@ _ZN4sycl3_V15queue18throw_asynchronousEv _ZN4sycl3_V15queue20memcpyToDeviceGlobalEPvPKvbmmRKSt6vectorINS0_5eventESaIS6_EE _ZN4sycl3_V15queue20wait_and_throw_proxyERKNS0_6detail13code_locationE _ZN4sycl3_V15queue22memcpyFromDeviceGlobalEPvPKvbmmRKSt6vectorINS0_5eventESaIS6_EE +_ZN4sycl3_V15queue22submit_with_event_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail14SubmissionInfoERKNS7_13code_locationEb _ZN4sycl3_V15queue25ext_oneapi_submit_barrierERKNS0_6detail13code_locationE _ZN4sycl3_V15queue25ext_oneapi_submit_barrierERKSt6vectorINS0_5eventESaIS3_EERKNS0_6detail13code_locationE _ZN4sycl3_V15queue25submit_without_event_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE _ZN4sycl3_V15queue25submit_without_event_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationEb +_ZN4sycl3_V15queue25submit_without_event_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail14SubmissionInfoERKNS7_13code_locationEb _ZN4sycl3_V15queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE _ZN4sycl3_V15queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEEb _ZN4sycl3_V15queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE @@ -3249,6 +3251,10 @@ _ZN4sycl3_V16detail14tls_code_loc_tC2ERKNS1_13code_locationE _ZN4sycl3_V16detail14tls_code_loc_tC2Ev _ZN4sycl3_V16detail14tls_code_loc_tD1Ev _ZN4sycl3_V16detail14tls_code_loc_tD2Ev +_ZN4sycl3_V16detail14SubmissionInfo14SecondaryQueueEv +_ZN4sycl3_V16detail14SubmissionInfo17PostProcessorFuncEv +_ZN4sycl3_V16detail14SubmissionInfoC1Ev +_ZN4sycl3_V16detail14SubmissionInfoC2Ev _ZN4sycl3_V16detail16AccessorBaseHost10getAccDataEv _ZN4sycl3_V16detail16AccessorBaseHost14getAccessRangeEv _ZN4sycl3_V16detail16AccessorBaseHost14getMemoryRangeEv @@ -3707,6 +3713,8 @@ _ZNK4sycl3_V16detail12buffer_plain13handleReleaseEv _ZNK4sycl3_V16detail12buffer_plain15getNativeVectorENS0_7backendE _ZNK4sycl3_V16detail12buffer_plain22get_allocator_internalEv _ZNK4sycl3_V16detail12buffer_plain7getSizeEv +_ZNK4sycl3_V16detail14SubmissionInfo14SecondaryQueueEv +_ZNK4sycl3_V16detail14SubmissionInfo17PostProcessorFuncEv _ZNK4sycl3_V16detail16AccessorBaseHost11getElemSizeEv _ZNK4sycl3_V16detail16AccessorBaseHost11getPropListEv _ZNK4sycl3_V16detail16AccessorBaseHost13isPlaceholderEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index b0b7fc3f0112d..a6e6a5e47c137 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -278,6 +278,9 @@ ??0SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z ??0SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z ??0SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@V?$range@$02@23@PEAXHHV?$id@$02@23@W4image_channel_type@23@W4image_channel_order@23@Uimage_sampler@23@AEBVproperty_list@23@@Z +??0SubmissionInfo@detail@_V1@sycl@@QEAA@XZ +??0SubmissionInfo@detail@_V1@sycl@@QEAA@AEBV0123@@Z +??0SubmissionInfo@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z ??0UnsampledImageAccessorBaseHost@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@VUnsampledImageAccessorImplHost@detail@_V1@sycl@@@std@@@Z ??0UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z ??0UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z @@ -462,6 +465,7 @@ ??1LocalAccessorBaseHost@detail@_V1@sycl@@QEAA@XZ ??1SYCLCategory@detail@_V1@sycl@@UEAA@XZ ??1SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@XZ +??1SubmissionInfo@detail@_V1@sycl@@QEAA@XZ ??1UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@XZ ??1accelerator_selector@_V1@sycl@@UEAA@XZ ??1buffer_plain@detail@_V1@sycl@@QEAA@XZ @@ -525,6 +529,8 @@ ??4OSUtil@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z +??4SubmissionInfo@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z +??4SubmissionInfo@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4accelerator_selector@_V1@sycl@@QEAAAEAV012@$$QEAV012@@Z @@ -638,9 +644,13 @@ ?GDBMethodsAnchor@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@IEAAXXZ ?GetRangeRoundingSettings@handler@_V1@sycl@@AEAAXAEA_K00@Z ?HasAssociatedAccessor@handler@_V1@sycl@@AEBA_NPEAVAccessorImplHost@detail@23@W4target@access@23@@Z +?PostProcessorFunc@SubmissionInfo@detail@_V1@sycl@@QEBAAEBV?$optional@V?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@std@@@234@XZ +?PostProcessorFunc@SubmissionInfo@detail@_V1@sycl@@QEAAAEAV?$optional@V?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@std@@@234@XZ ?PushBack@exception_list@_V1@sycl@@AEAAX$$QEAVexception_ptr@std@@@Z ?PushBack@exception_list@_V1@sycl@@AEAAXAEBVexception_ptr@std@@@Z ?RangeRoundingTrace@handler@_V1@sycl@@AEAA_NXZ +?SecondaryQueue@SubmissionInfo@detail@_V1@sycl@@QEAAAEAV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@XZ +?SecondaryQueue@SubmissionInfo@detail@_V1@sycl@@QEBAAEBV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@XZ ?SetHostTask@handler@_V1@sycl@@AEAAX$$QEAV?$function@$$A6AXVinterop_handle@_V1@sycl@@@Z@std@@@Z ?SetHostTask@handler@_V1@sycl@@AEAAX$$QEAV?$function@$$A6AXXZ@std@@@Z ?__abs_diff_impl@_V1@sycl@@YA?AV?$vec@C$00@12@V312@0@Z @@ -4272,8 +4282,10 @@ ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@_N@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@_N@Z +?submit_with_event_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBVSubmissionInfo@detail@23@AEBUcode_location@823@_N@Z ?submit_without_event_impl@queue@_V1@sycl@@AEAAXV?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@@Z ?submit_without_event_impl@queue@_V1@sycl@@AEAAXV?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@_N@Z +?submit_without_event_impl@queue@_V1@sycl@@AEAAXV?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBVSubmissionInfo@detail@23@AEBUcode_location@723@_N@Z ?supportsUSMFill2D@handler@_V1@sycl@@AEAA_NXZ ?supportsUSMMemcpy2D@handler@_V1@sycl@@AEAA_NXZ ?supportsUSMMemset2D@handler@_V1@sycl@@AEAA_NXZ diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index dc959046444af..8b0144fdbf44f 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -100,6 +100,7 @@ // CHECK-NEXT: kernel_handler.hpp // CHECK-NEXT: nd_item.hpp // CHECK-NEXT: nd_range.hpp +// CHECK-NEXT: detail/optional.hpp // CHECK-NEXT: device.hpp // CHECK-NEXT: kernel_bundle_enums.hpp // CHECK-NEXT: event.hpp