diff --git a/sycl/include/sycl/ext/oneapi/experimental/profiling_tag.hpp b/sycl/include/sycl/ext/oneapi/experimental/profiling_tag.hpp index dec91f77a7c7c..e21ce21b2c70e 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/profiling_tag.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/profiling_tag.hpp @@ -22,6 +22,14 @@ inline event submit_profiling_tag(queue &Queue, const sycl::detail::code_location &CodeLoc = sycl::detail::code_location::current()) { if (Queue.get_device().has(aspect::ext_oneapi_queue_profiling_tag)) { + // If the queue is out-of-order and profiling is enabled, the implementation + // can save some operations by just using the required barrier event + // directly. + if (!Queue.is_in_order() && + Queue.has_property()) + return Queue.ext_oneapi_submit_barrier(); + + // Otherwise, we use the internal implementation of the profiling tag. return Queue.submit( [=](handler &CGH) { sycl::detail::HandlerAccess::internalProfilingTagImpl(CGH); diff --git a/sycl/unittests/Extensions/ProfilingTag.cpp b/sycl/unittests/Extensions/ProfilingTag.cpp index 924eaf6bd66ff..d0d8dccfa3e93 100644 --- a/sycl/unittests/Extensions/ProfilingTag.cpp +++ b/sycl/unittests/Extensions/ProfilingTag.cpp @@ -129,6 +129,8 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedProfilingQueue) { "urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp); mock::getCallbacks().set_after_callback("urEventGetProfilingInfo", &after_urEventGetProfilingInfo); + mock::getCallbacks().set_after_callback( + "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); sycl::context Ctx{sycl::platform()}; sycl::queue Queue{Ctx, @@ -138,8 +140,11 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedProfilingQueue) { ASSERT_TRUE(Dev.has(sycl::aspect::ext_oneapi_queue_profiling_tag)); + // As an optimization, the implementation will use a single barrier when + // submitting a profiling tag on an out-of-order queue with profiling enabled. sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue); - ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp); + ASSERT_EQ(size_t{0}, counter_urEnqueueTimestampRecordingExp); + ASSERT_EQ(size_t{1}, counter_urEnqueueEventsWaitWithBarrier); E.get_profiling_info(); ASSERT_TRUE(LatestProfilingQuery.has_value());