Skip to content
189 changes: 93 additions & 96 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,27 @@ class queue_impl;
inline event submitAssertCapture(queue &, event &, queue *,
const detail::code_location &);
#endif

// Function to postprocess submitted command
// Arguments:
// bool IsKernel - true if the submitted command was kernel, false otherwise
// bool KernelUsesAssert - true if submitted kernel uses assert, only
// meaningful when IsKernel is true
// event &Event - event after which post processing should be executed
using SubmitPostProcessF = std::function<void(bool, bool, event &)>;

struct SubmissionInfoImpl;

class __SYCL_EXPORT SubmissionInfo {
public:
SubmissionInfo() = default;

void SetPostProcessing(const SubmitPostProcessF &PostProcessorFunc);
void
SetSecondaryQueue(const std::shared_ptr<detail::queue_impl> &SecondaryQueue);

std::shared_ptr<SubmissionInfoImpl> impl = nullptr;
};
} // namespace detail

namespace ext ::oneapi ::experimental {
Expand Down Expand Up @@ -340,28 +361,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
std::enable_if_t<std::is_invocable_r_v<void, T, handler &>, event> submit(
T CGF,
const detail::code_location &CodeLoc = detail::code_location::current()) {
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
#if __SYCL_USE_FALLBACK_ASSERT
auto PostProcess = [this, &TlsCodeLocCapture](
bool IsKernel, bool KernelUsesAssert, event &E) {
if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
KernelUsesAssert && !device_has(aspect::accelerator)) {
// __devicelib_assert_fail isn't supported by Device-side Runtime
// Linking against fallback impl of __devicelib_assert_fail is
// performed by program manager class
// Fallback assert isn't supported for FPGA
submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr,
TlsCodeLocCapture.query());
}
};

return submit_impl_and_postprocess(CGF, TlsCodeLocCapture.query(),
PostProcess,
TlsCodeLocCapture.isToplevel());
#else
return submit_impl(CGF, TlsCodeLocCapture.query(),
TlsCodeLocCapture.isToplevel());
#endif // __SYCL_USE_FALLBACK_ASSERT
return submit_with_event(CGF, /*SecondaryQueuePtr=*/nullptr, CodeLoc);
}

/// Submits a command group function object to the queue, in order to be
Expand All @@ -379,30 +379,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
std::enable_if_t<std::is_invocable_r_v<void, T, handler &>, event> submit(
T CGF, queue &SecondaryQueue,
const detail::code_location &CodeLoc = detail::code_location::current()) {
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
#if __SYCL_USE_FALLBACK_ASSERT
auto PostProcess = [this, &SecondaryQueue, &TlsCodeLocCapture](
bool IsKernel, bool KernelUsesAssert, event &E) {
if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
KernelUsesAssert && !device_has(aspect::accelerator)) {
// Only secondary queues on devices need to be added to the assert
// capture.
// __devicelib_assert_fail isn't supported by Device-side Runtime
// Linking against fallback impl of __devicelib_assert_fail is
// performed by program manager class
// Fallback assert isn't supported for FPGA
submitAssertCapture(*this, E, &SecondaryQueue,
TlsCodeLocCapture.query());
}
};

return submit_impl_and_postprocess(CGF, SecondaryQueue,
TlsCodeLocCapture.query(), PostProcess,
TlsCodeLocCapture.isToplevel());
#else
return submit_impl(CGF, SecondaryQueue, TlsCodeLocCapture.query(),
TlsCodeLocCapture.isToplevel());
#endif // __SYCL_USE_FALLBACK_ASSERT
return submit_with_event(CGF, &SecondaryQueue, CodeLoc);
}

/// Prevents any commands submitted afterward to this queue from executing
Expand Down Expand Up @@ -2770,23 +2747,83 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
queue &Q, CommandGroupFunc &&CGF,
const sycl::detail::code_location &CodeLoc);

/// A template-free version of submit.
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
/// TODO: Unused. Remove these when ABI-break window is open.
event submit_impl(std::function<void(handler &)> CGH,
const detail::code_location &CodeLoc);
event submit_impl(std::function<void(handler &)> CGH,
const detail::code_location &CodeLoc, bool IsTopCodeLoc);
/// A template-free version of submit.
event submit_impl(std::function<void(handler &)> CGH, queue secondQueue,
const detail::code_location &CodeLoc);
event submit_impl(std::function<void(handler &)> CGH, queue secondQueue,
const detail::code_location &CodeLoc, bool IsTopCodeLoc);

/// A template-free version of submit_without_event.
void submit_without_event_impl(std::function<void(handler &)> CGH,
const detail::code_location &CodeLoc);
void submit_without_event_impl(std::function<void(handler &)> CGH,
const detail::code_location &CodeLoc,
bool IsTopCodeLoc);
event
submit_impl_and_postprocess(std::function<void(handler &)> CGH,
const detail::code_location &CodeLoc,
const detail::SubmitPostProcessF &PostProcess);
event submit_impl_and_postprocess(
std::function<void(handler &)> CGH, const detail::code_location &CodeLoc,
const detail::SubmitPostProcessF &PostProcess, bool IsTopCodeLoc);
event
submit_impl_and_postprocess(std::function<void(handler &)> CGH,
queue secondQueue,
const detail::code_location &CodeLoc,
const detail::SubmitPostProcessF &PostProcess);
event submit_impl_and_postprocess(
std::function<void(handler &)> CGH, queue secondQueue,
const detail::code_location &CodeLoc,
const detail::SubmitPostProcessF &PostProcess, bool IsTopCodeLoc);
#endif // __INTEL_PREVIEW_BREAKING_CHANGES

/// A template-free versions of submit.
event submit_with_event_impl(std::function<void(handler &)> CGH,
const detail::SubmissionInfo &SubmitInfo,
const detail::code_location &CodeLoc,
bool IsTopCodeLoc);
Comment on lines +2790 to +2791
Copy link
Contributor

@aelovikov-intel aelovikov-intel Oct 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why aren't these two part of SubmitInfo? Maybe even CGH itself should go there...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The intention was for SubmissionInfo to be "free" if none of the optional arguments were passed. I suppose it could be part of the SubmissionInfo class itself, rather than its impl. Is that what you were thinking?

Of course, if there are non-optional arguments added in the future, we either have to add them to the impl of SubmissionInfo anyway, break ABI or add yet another API.


/// A template-free version of submit_without_event.
void submit_without_event_impl(std::function<void(handler &)> CGH,
const detail::SubmissionInfo &SubmitInfo,
const detail::code_location &CodeLoc,
bool IsTopCodeLoc);

/// Submits a command group function object to the queue, in order to be
/// scheduled for execution on the device.
///
/// \param CGF is a function object containing command group.
/// \param CodeLoc is the code location of the submit call (default argument)
/// \return a SYCL event object for the submitted command group.
template <typename T>
std::enable_if_t<std::is_invocable_r_v<void, T, handler &>, event>
submit_with_event(
T CGF, queue *SecondaryQueuePtr,
const detail::code_location &CodeLoc = detail::code_location::current()) {
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
detail::SubmissionInfo SI{};
if (SecondaryQueuePtr)
SI.SetSecondaryQueue(detail::getSyclObjImpl(*SecondaryQueuePtr));
#if __SYCL_USE_FALLBACK_ASSERT
SI.SetPostProcessing([this, &SecondaryQueuePtr, &TlsCodeLocCapture](
bool IsKernel, bool KernelUsesAssert, event &E) {
if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
KernelUsesAssert && !device_has(aspect::accelerator)) {
// __devicelib_assert_fail isn't supported by Device-side Runtime
// Linking against fallback impl of __devicelib_assert_fail is
// performed by program manager class
// Fallback assert isn't supported for FPGA
submitAssertCapture(*this, E, SecondaryQueuePtr,
TlsCodeLocCapture.query());
}
});
#endif // __SYCL_USE_FALLBACK_ASSERT
return submit_with_event_impl(CGF, SI, TlsCodeLocCapture.query(),
TlsCodeLocCapture.isToplevel());
}

/// Submits a command group function object to the queue, in order to be
/// scheduled for execution on the device.
Expand All @@ -2796,53 +2833,18 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
template <typename T>
std::enable_if_t<std::is_invocable_r_v<void, T, handler &>, void>
submit_without_event(T CGF, const detail::code_location &CodeLoc) {
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
#if __SYCL_USE_FALLBACK_ASSERT
// If post-processing is needed, fall back to the regular submit.
// TODO: Revisit whether we can avoid this.
submit(CGF, TlsCodeLocCapture.query());
submit_with_event(CGF, nullptr, CodeLoc);
#else
submit_without_event_impl(CGF, TlsCodeLocCapture.query(),
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
detail::SubmissionInfo SI{};
submit_without_event_impl(CGF, SI, TlsCodeLocCapture.query(),
TlsCodeLocCapture.isToplevel());
#endif // __SYCL_USE_FALLBACK_ASSERT
}

// Function to postprocess submitted command
// Arguments:
// bool IsKernel - true if the submitted command was kernel, false otherwise
// bool KernelUsesAssert - true if submitted kernel uses assert, only
// meaningful when IsKernel is true
// event &Event - event after which post processing should be executed
using SubmitPostProcessF = std::function<void(bool, bool, event &)>;

/// A template-free version of submit.
/// \param CGH command group function/handler
/// \param CodeLoc code location
///
/// This method stores additional information within event_impl class instance
event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
const detail::code_location &CodeLoc,
const SubmitPostProcessF &PostProcess);
event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
const detail::code_location &CodeLoc,
const SubmitPostProcessF &PostProcess,
bool IsTopCodeLoc);
/// A template-free version of submit.
/// \param CGH command group function/handler
/// \param secondQueue fallback queue
/// \param CodeLoc code location
///
/// This method stores additional information within event_impl class instance
event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
queue secondQueue,
const detail::code_location &CodeLoc,
const SubmitPostProcessF &PostProcess);
event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
queue secondQueue,
const detail::code_location &CodeLoc,
const SubmitPostProcessF &PostProcess,
bool IsTopCodeLoc);

/// parallel_for_impl with a kernel represented as a lambda + range that
/// specifies global size only.
///
Expand Down Expand Up @@ -3064,13 +3066,8 @@ event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue,
});
};

if (SecondaryQueue) {
CopierEv = Self.submit_impl(CopierCGF, *SecondaryQueue, CodeLoc);
CheckerEv = Self.submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc);
} else {
CopierEv = Self.submit_impl(CopierCGF, CodeLoc);
CheckerEv = Self.submit_impl(CheckerCGF, CodeLoc);
}
CopierEv = Self.submit_with_event(CopierCGF, SecondaryQueue, CodeLoc);
CheckerEv = Self.submit_with_event(CheckerCGF, SecondaryQueue, CodeLoc);

return CheckerEv;
}
Expand Down
10 changes: 6 additions & 4 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ event queue_impl::submit_impl(const std::function<void(handler &)> &CGF,
bool CallerNeedsEvent,
const detail::code_location &Loc,
bool IsTopCodeLoc,
const SubmitPostProcessF *PostProcess) {
const SubmissionInfo &SubmitInfo) {
handler Handler(Self, PrimaryQueue, SecondaryQueue, CallerNeedsEvent);
Handler.saveCodeLoc(Loc, IsTopCodeLoc);

Expand All @@ -374,7 +374,9 @@ event queue_impl::submit_impl(const std::function<void(handler &)> &CGF,
if (Type == CGType::Kernel)
Streams = std::move(Handler.MStreamStorage);

if (PostProcess) {
if (SubmitInfo.impl && SubmitInfo.impl->MPostProcessorFunc) {
auto &PostProcess = *(SubmitInfo.impl->MPostProcessorFunc);

bool IsKernel = Type == CGType::Kernel;
bool KernelUsesAssert = false;

Expand All @@ -385,7 +387,7 @@ event queue_impl::submit_impl(const std::function<void(handler &)> &CGF,
Handler.MKernelName.c_str());
finalizeHandler(Handler, Event);

(*PostProcess)(IsKernel, KernelUsesAssert, Event);
PostProcess(IsKernel, KernelUsesAssert, Event);
} else
finalizeHandler(Handler, Event);

Expand Down Expand Up @@ -416,7 +418,7 @@ event queue_impl::submitWithHandler(const std::shared_ptr<queue_impl> &Self,
CGH.depends_on(DepEvents);
HandlerFunc(CGH);
},
Self, /*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
Self, /*CodeLoc*/ {}, /*SubmissionInfo*/ {}, /*IsTopCodeLoc*/ true);
}

template <typename HandlerFuncT, typename MemOpFuncT, typename... MemOpArgTs>
Expand Down
Loading
Loading