diff --git a/unified-runtime/source/adapters/level_zero/v2/command_buffer.cpp b/unified-runtime/source/adapters/level_zero/v2/command_buffer.cpp index bd1831340c059..254cc3448c9bb 100644 --- a/unified-runtime/source/adapters/level_zero/v2/command_buffer.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/command_buffer.cpp @@ -47,7 +47,28 @@ ur_result_t ur_exp_command_buffer_handle_t_::finalizeCommandBuffer() { isFinalized = true; return UR_RESULT_SUCCESS; } +ur_event_handle_t ur_exp_command_buffer_handle_t_::getExecutionEventUnlocked() { + return currentExecution; +} + +ur_result_t ur_exp_command_buffer_handle_t_::registerExecutionEventUnlocked( + ur_event_handle_t nextExecutionEvent) { + if (currentExecution) { + UR_CALL(currentExecution->release()); + currentExecution = nullptr; + } + if (nextExecutionEvent) { + currentExecution = nextExecutionEvent; + UR_CALL(nextExecutionEvent->retain()); + } + return UR_RESULT_SUCCESS; +} +ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { + if (currentExecution) { + currentExecution->release(); + } +} namespace ur::level_zero { ur_result_t diff --git a/unified-runtime/source/adapters/level_zero/v2/command_buffer.hpp b/unified-runtime/source/adapters/level_zero/v2/command_buffer.hpp index b1837d224bffc..9191c342345d6 100644 --- a/unified-runtime/source/adapters/level_zero/v2/command_buffer.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/command_buffer.hpp @@ -23,7 +23,11 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object { v2::raii::command_list_unique_handle &&commandList, const ur_exp_command_buffer_desc_t *desc); - ~ur_exp_command_buffer_handle_t_() = default; + ~ur_exp_command_buffer_handle_t_(); + + ur_event_handle_t getExecutionEventUnlocked(); + ur_result_t + registerExecutionEventUnlocked(ur_event_handle_t nextExecutionEvent); lockable commandListManager; @@ -36,6 +40,8 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object { private: // Indicates if command-buffer was finalized. bool isFinalized = false; + + ur_event_handle_t currentExecution = nullptr; }; struct ur_exp_command_buffer_command_handle_t_ : public _ur_object { diff --git a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp index 975a859f29fa0..2962ac7eb2c5e 100644 --- a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp @@ -157,16 +157,20 @@ ur_result_t ur_command_list_manager::appendRegionCopyUnlocked( return UR_RESULT_SUCCESS; } -wait_list_view -ur_command_list_manager::getWaitListView(const ur_event_handle_t *phWaitEvents, - uint32_t numWaitEvents) { +wait_list_view ur_command_list_manager::getWaitListView( + const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents, + ur_event_handle_t additionalWaitEvent) { - waitList.resize(numWaitEvents); + uint32_t totalNumWaitEvents = + numWaitEvents + (additionalWaitEvent != nullptr ? 1 : 0); + waitList.resize(totalNumWaitEvents); for (uint32_t i = 0; i < numWaitEvents; i++) { waitList[i] = phWaitEvents[i]->getZeEvent(); } - - return {waitList.data(), static_cast(numWaitEvents)}; + if (additionalWaitEvent != nullptr) { + waitList[totalNumWaitEvents - 1] = additionalWaitEvent->getZeEvent(); + } + return {waitList.data(), static_cast(totalNumWaitEvents)}; } ze_event_handle_t diff --git a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp index 24d84335e7210..d97626e0125a8 100644 --- a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp @@ -128,8 +128,9 @@ struct ur_command_list_manager { ze_command_list_handle_t getZeCommandList(); - wait_list_view getWaitListView(const ur_event_handle_t *phWaitEvents, - uint32_t numWaitEvents); + wait_list_view + getWaitListView(const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents, + ur_event_handle_t additionalWaitEvent = nullptr); ze_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent, ur_command_t commandType); diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index 3209ec9b7d121..7646b48bfc6dc 100644 --- a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -23,8 +23,10 @@ namespace v2 { wait_list_view ur_queue_immediate_in_order_t::getWaitListView( locked &commandList, - const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents) { - return commandList->getWaitListView(phWaitEvents, numWaitEvents); + const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents, + ur_event_handle_t additionalWaitEvent) { + return commandList->getWaitListView(phWaitEvents, numWaitEvents, + additionalWaitEvent); } static int32_t getZeOrdinal(ur_device_handle_t hDevice) { @@ -898,7 +900,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueTimestampRecordingExp( ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCommandListsExp( uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ur_event_handle_t *phEvent, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_command_t callerCommand) { + const ur_event_handle_t *phEventWaitList, ur_command_t callerCommand, + ur_event_handle_t additionalWaitEvent) { TRACK_SCOPE_LATENCY( "ur_queue_immediate_in_order_t::enqueueGenericCommandListsExp"); @@ -907,7 +910,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCommandListsExp( getSignalEvent(commandListLocked, phEvent, callerCommand); auto [pWaitEvents, numWaitEvents] = - getWaitListView(commandListLocked, phEventWaitList, numEventsInWaitList); + getWaitListView(commandListLocked, phEventWaitList, numEventsInWaitList, + additionalWaitEvent); // zeCommandListImmediateAppendCommandListsExp is not working with in-order // immediate lists what causes problems with synchronization // TODO: remove synchronization when it is not needed @@ -928,9 +932,21 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueCommandBufferExp( auto commandListLocked = hCommandBuffer->commandListManager.lock(); ze_command_list_handle_t commandBufferCommandList = commandListLocked->getZeCommandList(); - return enqueueGenericCommandListsExp(1, &commandBufferCommandList, phEvent, - numEventsInWaitList, phEventWaitList, - UR_COMMAND_ENQUEUE_COMMAND_BUFFER_EXP); + ur_event_handle_t internalEvent = nullptr; + if (phEvent == nullptr) { + phEvent = &internalEvent; + } + ur_event_handle_t executionEvent = + hCommandBuffer->getExecutionEventUnlocked(); + + UR_CALL(enqueueGenericCommandListsExp( + 1, &commandBufferCommandList, phEvent, numEventsInWaitList, + phEventWaitList, UR_COMMAND_ENQUEUE_COMMAND_BUFFER_EXP, executionEvent)); + UR_CALL(hCommandBuffer->registerExecutionEventUnlocked(*phEvent)); + if (internalEvent != nullptr) { + internalEvent->release(); + } + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunchCustomExp( diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp index 3c843a477616b..75fd7aba89b2f 100644 --- a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp @@ -37,9 +37,10 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_t_ { std::vector deferredEvents; std::vector submittedKernels; - wait_list_view getWaitListView(locked &commandList, - const ur_event_handle_t *phWaitEvents, - uint32_t numWaitEvents); + wait_list_view + getWaitListView(locked &commandList, + const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents, + ur_event_handle_t additionalWaitEvent = nullptr); ze_event_handle_t getSignalEvent(locked &commandList, ur_event_handle_t *hUserEvent, @@ -56,7 +57,8 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_t_ { ur_result_t enqueueGenericCommandListsExp( uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ur_event_handle_t *phEvent, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_command_t callerCommand); + const ur_event_handle_t *phEventWaitList, ur_command_t callerCommand, + ur_event_handle_t additionalWaitEvent); ur_result_t enqueueEventsWaitWithBarrierImpl(uint32_t numEventsInWaitList, diff --git a/unified-runtime/test/conformance/exp_command_buffer/fixtures.h b/unified-runtime/test/conformance/exp_command_buffer/fixtures.h index f5ac7b1ecf4a4..b49c265d89694 100644 --- a/unified-runtime/test/conformance/exp_command_buffer/fixtures.h +++ b/unified-runtime/test/conformance/exp_command_buffer/fixtures.h @@ -45,7 +45,6 @@ static void checkCommandBufferUpdateSupport( struct urCommandBufferExpTest : uur::urContextTest { void SetUp() override { - UUR_KNOWN_FAILURE_ON(uur::LevelZeroV2{}); UUR_RETURN_ON_FATAL_FAILURE(uur::urContextTest::SetUp()); @@ -72,7 +71,6 @@ struct urCommandBufferExpTest : uur::urContextTest { template struct urCommandBufferExpTestWithParam : urQueueTestWithParam { void SetUp() override { - UUR_KNOWN_FAILURE_ON(uur::LevelZeroV2{}); UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTestWithParam::SetUp()); @@ -97,7 +95,6 @@ struct urCommandBufferExpTestWithParam : urQueueTestWithParam { struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest { void SetUp() override { - UUR_KNOWN_FAILURE_ON(uur::LevelZeroV2{}); UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); diff --git a/unified-runtime/test/conformance/exp_command_buffer/update/invalid_update.cpp b/unified-runtime/test/conformance/exp_command_buffer/update/invalid_update.cpp index 409a2e0601103..fea62b1c57f9d 100644 --- a/unified-runtime/test/conformance/exp_command_buffer/update/invalid_update.cpp +++ b/unified-runtime/test/conformance/exp_command_buffer/update/invalid_update.cpp @@ -309,6 +309,8 @@ TEST_P(InvalidUpdateTest, CommandBufferMismatch) { // that isn't supported. struct InvalidUpdateCommandBufferExpExecutionTest : uur::urKernelExecutionTest { void SetUp() override { + UUR_KNOWN_FAILURE_ON(uur::LevelZeroV2{}); + program_name = "fill_usm"; UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp());