@@ -75,10 +75,8 @@ auto get_native(const SyclObjectT &Obj)
7575namespace detail {
7676class queue_impl ;
7777
78- #if __SYCL_USE_FALLBACK_ASSERT
7978inline event submitAssertCapture (queue &, event &, queue *,
8079 const detail::code_location &);
81- #endif
8280
8381// Function to postprocess submitted command
8482// Arguments:
@@ -375,8 +373,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
375373 std::enable_if_t <std::is_invocable_r_v<void , T, handler &>, event> submit (
376374 T CGF,
377375 const detail::code_location &CodeLoc = detail::code_location::current()) {
378- return submit_with_event (
379- sycl::ext::oneapi::experimental::empty_properties_t {}, CGF,
376+ return submit_with_event<__SYCL_USE_FALLBACK_ASSERT>(
377+ sycl::ext::oneapi::experimental::empty_properties_t {},
378+ detail::type_erased_cgfo_ty{CGF},
380379 /* SecondaryQueuePtr=*/ nullptr , CodeLoc);
381380 }
382381
@@ -395,9 +394,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
395394 std::enable_if_t <std::is_invocable_r_v<void , T, handler &>, event> submit (
396395 T CGF, queue &SecondaryQueue,
397396 const detail::code_location &CodeLoc = detail::code_location::current()) {
398- return submit_with_event (
399- sycl::ext::oneapi::experimental::empty_properties_t {}, CGF,
400- &SecondaryQueue, CodeLoc);
397+ return submit_with_event<__SYCL_USE_FALLBACK_ASSERT> (
398+ sycl::ext::oneapi::experimental::empty_properties_t {},
399+ detail::type_erased_cgfo_ty{CGF}, &SecondaryQueue, CodeLoc);
401400 }
402401
403402 // / Prevents any commands submitted afterward to this queue from executing
@@ -2786,6 +2785,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
27862785
27872786#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
27882787 // / TODO: Unused. Remove these when ABI-break window is open.
2788+ // / Not using `type_erased_cgfo_ty` on purpose.
27892789 event submit_impl (std::function<void (handler &)> CGH,
27902790 const detail::code_location &CodeLoc);
27912791 event submit_impl (std::function<void (handler &)> CGH,
@@ -2815,16 +2815,28 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
28152815 std::function<void (handler &)> CGH, queue secondQueue,
28162816 const detail::code_location &CodeLoc,
28172817 const detail::SubmitPostProcessF &PostProcess, bool IsTopCodeLoc);
2818+
2819+ // Old version when `std::function` was used in place of
2820+ // `std::function<void(handler &)>`.
2821+ event submit_with_event_impl (std::function<void (handler &)> CGH,
2822+ const detail::SubmissionInfo &SubmitInfo,
2823+ const detail::code_location &CodeLoc,
2824+ bool IsTopCodeLoc);
2825+
2826+ void submit_without_event_impl (std::function<void (handler &)> CGH,
2827+ const detail::SubmissionInfo &SubmitInfo,
2828+ const detail::code_location &CodeLoc,
2829+ bool IsTopCodeLoc);
28182830#endif // __INTEL_PREVIEW_BREAKING_CHANGES
28192831
28202832 // / A template-free versions of submit.
2821- event submit_with_event_impl (std::function< void (handler &)> CGH,
2833+ event submit_with_event_impl (const detail::type_erased_cgfo_ty & CGH,
28222834 const detail::SubmissionInfo &SubmitInfo,
28232835 const detail::code_location &CodeLoc,
28242836 bool IsTopCodeLoc);
28252837
28262838 // / A template-free version of submit_without_event.
2827- void submit_without_event_impl (std::function< void (handler &)> CGH,
2839+ void submit_without_event_impl (const detail::type_erased_cgfo_ty & CGH,
28282840 const detail::SubmissionInfo &SubmitInfo,
28292841 const detail::code_location &CodeLoc,
28302842 bool IsTopCodeLoc);
@@ -2836,32 +2848,35 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
28362848 // / \param CGF is a function object containing command group.
28372849 // / \param CodeLoc is the code location of the submit call (default argument)
28382850 // / \return a SYCL event object for the submitted command group.
2839- template <typename T, typename PropertiesT>
2840- std::enable_if_t <std::is_invocable_r_v<void , T, handler &>, event>
2841- submit_with_event (
2842- PropertiesT Props, T CGF, queue *SecondaryQueuePtr,
2851+ //
2852+ // UseFallBackAssert as template param vs `#if` in function body is necessary
2853+ // to prevent ODR-violation between TUs built with different fallback assert
2854+ // modes.
2855+ template <bool UseFallbackAssert, typename PropertiesT>
2856+ event submit_with_event (
2857+ PropertiesT Props, const detail::type_erased_cgfo_ty &CGF,
2858+ queue *SecondaryQueuePtr,
28432859 const detail::code_location &CodeLoc = detail::code_location::current()) {
28442860 detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
28452861 detail::SubmissionInfo SI{};
28462862 ProcessSubmitProperties (Props, SI);
28472863 if (SecondaryQueuePtr)
28482864 SI.SecondaryQueue () = detail::getSyclObjImpl (*SecondaryQueuePtr);
2849- #if __SYCL_USE_FALLBACK_ASSERT
2850- SI.PostProcessorFunc () =
2851- [this , &SecondaryQueuePtr,
2852- &TlsCodeLocCapture](bool IsKernel, bool KernelUsesAssert, event &E) {
2853- if (IsKernel && !device_has (aspect::ext_oneapi_native_assert) &&
2854- KernelUsesAssert && !device_has (aspect::accelerator)) {
2855- // __devicelib_assert_fail isn't supported by Device-side Runtime
2856- // Linking against fallback impl of __devicelib_assert_fail is
2857- // performed by program manager class
2858- // Fallback assert isn't supported for FPGA
2859- submitAssertCapture (*this , E, SecondaryQueuePtr,
2860- TlsCodeLocCapture.query ());
2861- }
2862- };
2863- #endif // __SYCL_USE_FALLBACK_ASSERT
2864- return submit_with_event_impl (std::move (CGF), SI, TlsCodeLocCapture.query (),
2865+ if constexpr (UseFallbackAssert)
2866+ SI.PostProcessorFunc () =
2867+ [this , &SecondaryQueuePtr,
2868+ &TlsCodeLocCapture](bool IsKernel, bool KernelUsesAssert, event &E) {
2869+ if (IsKernel && !device_has (aspect::ext_oneapi_native_assert) &&
2870+ KernelUsesAssert && !device_has (aspect::accelerator)) {
2871+ // __devicelib_assert_fail isn't supported by Device-side Runtime
2872+ // Linking against fallback impl of __devicelib_assert_fail is
2873+ // performed by program manager class
2874+ // Fallback assert isn't supported for FPGA
2875+ submitAssertCapture (*this , E, SecondaryQueuePtr,
2876+ TlsCodeLocCapture.query ());
2877+ }
2878+ };
2879+ return submit_with_event_impl (CGF, SI, TlsCodeLocCapture.query (),
28652880 TlsCodeLocCapture.isToplevel ());
28662881 }
28672882
@@ -2871,21 +2886,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
28712886 // / \param Props is a property list with submission properties.
28722887 // / \param CGF is a function object containing command group.
28732888 // / \param CodeLoc is the code location of the submit call (default argument)
2874- template <typename T, typename PropertiesT>
2875- std::enable_if_t <std::is_invocable_r_v<void , T, handler &>, void >
2876- submit_without_event (PropertiesT Props, T CGF,
2877- const detail::code_location &CodeLoc) {
2878- #if __SYCL_USE_FALLBACK_ASSERT
2879- // If post-processing is needed, fall back to the regular submit.
2880- // TODO: Revisit whether we can avoid this.
2881- submit_with_event (Props, CGF, nullptr , CodeLoc);
2882- #else
2883- detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
2884- detail::SubmissionInfo SI{};
2885- ProcessSubmitProperties (Props, SI);
2886- submit_without_event_impl (CGF, SI, TlsCodeLocCapture.query (),
2887- TlsCodeLocCapture.isToplevel ());
2888- #endif // __SYCL_USE_FALLBACK_ASSERT
2889+ //
2890+ // UseFallBackAssert as template param vs `#if` in function body is necessary
2891+ // to prevent ODR-violation between TUs built with different fallback assert
2892+ // modes.
2893+ template <bool UseFallbackAssert, typename PropertiesT>
2894+ void submit_without_event (PropertiesT Props,
2895+ const detail::type_erased_cgfo_ty &CGF,
2896+ const detail::code_location &CodeLoc) {
2897+ if constexpr (UseFallbackAssert) {
2898+ // If post-processing is needed, fall back to the regular submit.
2899+ // TODO: Revisit whether we can avoid this.
2900+ submit_with_event<UseFallbackAssert>(Props, CGF, nullptr , CodeLoc);
2901+ } else {
2902+ detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
2903+ detail::SubmissionInfo SI{};
2904+ ProcessSubmitProperties (Props, SI);
2905+ submit_without_event_impl (CGF, SI, TlsCodeLocCapture.query (),
2906+ TlsCodeLocCapture.isToplevel ());
2907+ }
28892908 }
28902909
28912910 // / parallel_for_impl with a kernel represented as a lambda + range that
@@ -3114,10 +3133,10 @@ event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue,
31143133 });
31153134 };
31163135
3117- CopierEv = Self.submit_with_event (
3136+ CopierEv = Self.submit_with_event < true > (
31183137 sycl::ext::oneapi::experimental::empty_properties_t {}, CopierCGF,
31193138 SecondaryQueue, CodeLoc);
3120- CheckerEv = Self.submit_with_event (
3139+ CheckerEv = Self.submit_with_event < true > (
31213140 sycl::ext::oneapi::experimental::empty_properties_t {}, CheckerCGF,
31223141 SecondaryQueue, CodeLoc);
31233142
0 commit comments