@@ -1289,16 +1289,26 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
12891289 }
12901290
12911291 auto Queue = EventList[I]->UrQueue ;
1292- if (Queue) {
1293- // The caller of createAndRetainUrZeEventList must already hold
1294- // a lock of the CurQueue. Additionally lock the Queue if it
1295- // is different from CurQueue.
1296- // TODO: rework this to avoid deadlock when another thread is
1297- // locking the same queues but in a different order.
1298- auto Lock = ((Queue == CurQueue)
1299- ? std::unique_lock<ur_shared_mutex>()
1300- : std::unique_lock<ur_shared_mutex>(Queue->Mutex ));
13011292
1293+ auto CurQueueDevice = CurQueue->Device ;
1294+ std::optional<std::unique_lock<ur_shared_mutex>> QueueLock =
1295+ std::nullopt ;
1296+ // The caller of createAndRetainUrZeEventList must already hold
1297+ // a lock of the CurQueue. However, if the CurQueue is different
1298+ // then the Event's Queue, we need to drop that lock and
1299+ // acquire the Event's Queue lock. This is done to avoid a lock
1300+ // ordering issue.
1301+ // For the rest of this scope, CurQueue cannot be accessed.
1302+ // TODO: This solution is very error-prone. This requires a refactor
1303+ // to either have fine-granularity locks inside of the queues or
1304+ // to move any operations on queues other than CurQueue out
1305+ // of this scope.
1306+ if (Queue && Queue != CurQueue) {
1307+ CurQueue->Mutex .unlock ();
1308+ QueueLock = std::unique_lock<ur_shared_mutex>(Queue->Mutex );
1309+ }
1310+
1311+ if (Queue) {
13021312 // If the event that is going to be waited is in an open batch
13031313 // different from where this next command is going to be added,
13041314 // then we have to force execute of that open command-list
@@ -1341,7 +1351,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
13411351 }
13421352
13431353 ur_command_list_ptr_t CommandList;
1344- if (Queue && Queue->Device != CurQueue-> Device ) {
1354+ if (Queue && Queue->Device != CurQueueDevice ) {
13451355 // Get a command list prior to acquiring an event lock.
13461356 // This prevents a potential deadlock with recursive
13471357 // event locks.
@@ -1351,7 +1361,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
13511361
13521362 std::shared_lock<ur_shared_mutex> Lock (EventList[I]->Mutex );
13531363
1354- if (Queue && Queue->Device != CurQueue-> Device &&
1364+ if (Queue && Queue->Device != CurQueueDevice &&
13551365 !EventList[I]->IsMultiDevice ) {
13561366 ze_event_handle_t MultiDeviceZeEvent = nullptr ;
13571367 ur_event_handle_t MultiDeviceEvent;
@@ -1386,6 +1396,10 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
13861396 this ->UrEventList [TmpListLength]->RefCount .increment ();
13871397 }
13881398
1399+ if (QueueLock.has_value ()) {
1400+ QueueLock.reset ();
1401+ CurQueue->Mutex .lock ();
1402+ }
13891403 TmpListLength += 1 ;
13901404 }
13911405 }
0 commit comments