@@ -5253,27 +5253,6 @@ pi_result piKernelRetain(pi_kernel Kernel) {
52535253pi_result piKernelRelease (pi_kernel Kernel) {
52545254 PI_ASSERT (Kernel, PI_ERROR_INVALID_KERNEL);
52555255
5256- if (IndirectAccessTrackingEnabled) {
5257- // piKernelRelease is called by CleanupCompletedEvent(Event) as soon as
5258- // kernel execution has finished. This is the place where we need to release
5259- // memory allocations. If kernel is not in use (not submitted by some
5260- // other thread) then release referenced memory allocations. As a result,
5261- // memory can be deallocated and context can be removed from container in
5262- // the platform. That's why we need to lock a mutex here.
5263- pi_platform Plt = Kernel->Program ->Context ->getPlatform ();
5264- std::scoped_lock<pi_shared_mutex> ContextsLock (Plt->ContextsMutex );
5265-
5266- if (--Kernel->SubmissionsCount == 0 ) {
5267- // Kernel is not submitted for execution, release referenced memory
5268- // allocations.
5269- for (auto &MemAlloc : Kernel->MemAllocs ) {
5270- USMFreeHelper (MemAlloc->second .Context , MemAlloc->first ,
5271- MemAlloc->second .OwnZeMemHandle );
5272- }
5273- Kernel->MemAllocs .clear ();
5274- }
5275- }
5276-
52775256 if (!Kernel->RefCount .decrementAndTest ())
52785257 return PI_SUCCESS;
52795258
@@ -5821,9 +5800,35 @@ static pi_result CleanupCompletedEvent(pi_event Event, bool QueueLocked) {
58215800 Event->CleanedUp = true ;
58225801 }
58235802
5803+ auto ReleaseIndirectMem = [](pi_kernel Kernel) {
5804+ if (IndirectAccessTrackingEnabled) {
5805+ // piKernelRelease is called by CleanupCompletedEvent(Event) as soon as
5806+ // kernel execution has finished. This is the place where we need to
5807+ // release memory allocations. If kernel is not in use (not submitted by
5808+ // some other thread) then release referenced memory allocations. As a
5809+ // result, memory can be deallocated and context can be removed from
5810+ // container in the platform. That's why we need to lock a mutex here.
5811+ pi_platform Plt = Kernel->Program ->Context ->getPlatform ();
5812+ std::scoped_lock<pi_shared_mutex> ContextsLock (Plt->ContextsMutex );
5813+
5814+ if (--Kernel->SubmissionsCount == 0 ) {
5815+ // Kernel is not submitted for execution, release referenced memory
5816+ // allocations.
5817+ for (auto &MemAlloc : Kernel->MemAllocs ) {
5818+ // std::pair<void *const, MemAllocRecord> *, Hash
5819+ USMFreeHelper (MemAlloc->second .Context , MemAlloc->first ,
5820+ MemAlloc->second .OwnZeMemHandle );
5821+ }
5822+ Kernel->MemAllocs .clear ();
5823+ }
5824+ }
5825+ };
5826+
58245827 // We've reset event data members above, now cleanup resources.
5825- if (AssociatedKernel)
5828+ if (AssociatedKernel) {
5829+ ReleaseIndirectMem (AssociatedKernel);
58265830 PI_CALL (piKernelRelease (AssociatedKernel));
5831+ }
58275832
58285833 if (AssociatedQueue) {
58295834 {
@@ -5877,8 +5882,10 @@ static pi_result CleanupCompletedEvent(pi_event Event, bool QueueLocked) {
58775882 }
58785883 }
58795884 }
5880- if (DepEventKernel)
5881- PI_CALL (piKernelRelease (pi_cast<pi_kernel>(DepEvent->CommandData )));
5885+ if (DepEventKernel) {
5886+ ReleaseIndirectMem (DepEventKernel);
5887+ PI_CALL (piKernelRelease (DepEventKernel));
5888+ }
58825889 PI_CALL (piEventReleaseInternal (DepEvent));
58835890 }
58845891
0 commit comments