@@ -159,6 +159,8 @@ ur_result_t ur_queue_immediate_in_order_t::queueFinish() {
159159 ZE2UR_CALL (zeCommandListHostSynchronize,
160160 (commandListLocked->getZeCommandList (), UINT64_MAX));
161161
162+ hContext->getAsyncPool ()->cleanupPoolsForQueue (this );
163+
162164 // Free deferred kernels
163165 for (auto &hKernel : submittedKernels) {
164166 UR_CALL (hKernel->release ());
@@ -706,31 +708,155 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueWriteHostPipe(
706708 return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
707709}
708710
711+ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMAllocHelper (
712+ ur_usm_pool_handle_t pPool, const size_t size,
713+ const ur_exp_async_usm_alloc_properties_t *, uint32_t numEventsInWaitList,
714+ const ur_event_handle_t *phEventWaitList, void **ppMem,
715+ ur_event_handle_t *phEvent, ur_usm_type_t type) {
716+ auto commandListLocked = commandListManager.lock ();
717+
718+ if (!pPool) {
719+ pPool = hContext->getAsyncPool ();
720+ }
721+
722+ auto device = (type == UR_USM_TYPE_HOST) ? nullptr : hDevice;
723+
724+ ur_event_handle_t originAllocEvent = nullptr ;
725+ auto asyncAlloc = pPool->allocateEnqueued (hContext, this , true , device,
726+ nullptr , type, size);
727+ if (!asyncAlloc) {
728+ auto Ret = pPool->allocate (hContext, device, nullptr , type, size, ppMem);
729+ if (Ret) {
730+ return Ret;
731+ }
732+ } else {
733+ std::tie (*ppMem, originAllocEvent) = *asyncAlloc;
734+ }
735+
736+ auto waitListView = getWaitListView (commandListLocked, phEventWaitList,
737+ numEventsInWaitList, originAllocEvent);
738+
739+ ur_command_t commandType = UR_COMMAND_FORCE_UINT32;
740+ switch (type) {
741+ case UR_USM_TYPE_HOST:
742+ commandType = UR_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP;
743+ break ;
744+ case UR_USM_TYPE_DEVICE:
745+ commandType = UR_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP;
746+ break ;
747+ case UR_USM_TYPE_SHARED:
748+ commandType = UR_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP;
749+ break ;
750+ default :
751+ UR_LOG (ERR, " enqueueUSMAllocHelper: unsupported USM type" );
752+ throw UR_RESULT_ERROR_INVALID_ARGUMENT;
753+ }
754+
755+ auto zeSignalEvent = getSignalEvent (commandListLocked, phEvent, commandType);
756+ auto [pWaitEvents, numWaitEvents] = waitListView;
757+
758+ if (numWaitEvents > 0 ) {
759+ ZE2UR_CALL (
760+ zeCommandListAppendWaitOnEvents,
761+ (commandListLocked->getZeCommandList (), numWaitEvents, pWaitEvents));
762+ }
763+ if (zeSignalEvent) {
764+ ZE2UR_CALL (zeCommandListAppendSignalEvent,
765+ (commandListLocked->getZeCommandList (), zeSignalEvent));
766+ }
767+ if (originAllocEvent) {
768+ originAllocEvent->release ();
769+ }
770+
771+ return UR_RESULT_SUCCESS;
772+ }
773+
709774ur_result_t ur_queue_immediate_in_order_t::enqueueUSMDeviceAllocExp (
710- ur_usm_pool_handle_t , const size_t ,
711- const ur_exp_async_usm_alloc_properties_t *, uint32_t ,
712- const ur_event_handle_t *, void **, ur_event_handle_t *) {
713- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
775+ ur_usm_pool_handle_t pPool, const size_t size,
776+ const ur_exp_async_usm_alloc_properties_t *pProperties,
777+ uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
778+ void **ppMem, ur_event_handle_t *phEvent) {
779+ TRACK_SCOPE_LATENCY (
780+ " ur_queue_immediate_in_order_t::enqueueUSMDeviceAllocExp" );
781+
782+ return enqueueUSMAllocHelper (pPool, size, pProperties, numEventsInWaitList,
783+ phEventWaitList, ppMem, phEvent,
784+ UR_USM_TYPE_DEVICE);
714785}
715786
716787ur_result_t ur_queue_immediate_in_order_t::enqueueUSMSharedAllocExp (
717- ur_usm_pool_handle_t , const size_t ,
718- const ur_exp_async_usm_alloc_properties_t *, uint32_t ,
719- const ur_event_handle_t *, void **, ur_event_handle_t *) {
720- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
788+ ur_usm_pool_handle_t pPool, const size_t size,
789+ const ur_exp_async_usm_alloc_properties_t *pProperties,
790+ uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
791+ void **ppMem, ur_event_handle_t *phEvent) {
792+ TRACK_SCOPE_LATENCY (
793+ " ur_queue_immediate_in_order_t::enqueueUSMSharedAllocExp" );
794+
795+ return enqueueUSMAllocHelper (pPool, size, pProperties, numEventsInWaitList,
796+ phEventWaitList, ppMem, phEvent,
797+ UR_USM_TYPE_SHARED);
721798}
722799
723800ur_result_t ur_queue_immediate_in_order_t::enqueueUSMHostAllocExp (
724- ur_usm_pool_handle_t , const size_t ,
725- const ur_exp_async_usm_alloc_properties_t *, uint32_t ,
726- const ur_event_handle_t *, void **, ur_event_handle_t *) {
727- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
801+ ur_usm_pool_handle_t pPool, const size_t size,
802+ const ur_exp_async_usm_alloc_properties_t *pProperties,
803+ uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
804+ void **ppMem, ur_event_handle_t *phEvent) {
805+ TRACK_SCOPE_LATENCY (" ur_queue_immediate_in_order_t::enqueueUSMHostAllocExp" );
806+
807+ return enqueueUSMAllocHelper (pPool, size, pProperties, numEventsInWaitList,
808+ phEventWaitList, ppMem, phEvent,
809+ UR_USM_TYPE_HOST);
728810}
729811
730812ur_result_t ur_queue_immediate_in_order_t::enqueueUSMFreeExp (
731- ur_usm_pool_handle_t , void *, uint32_t , const ur_event_handle_t *,
732- ur_event_handle_t *) {
733- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
813+ ur_usm_pool_handle_t , void *pMem, uint32_t numEventsInWaitList,
814+ const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
815+ TRACK_SCOPE_LATENCY (" ur_queue_immediate_in_order_t::enqueueUSMFreeExp" );
816+ auto commandListLocked = commandListManager.lock ();
817+ ur_event_handle_t internalEvent = nullptr ;
818+ if (phEvent == nullptr ) {
819+ phEvent = &internalEvent;
820+ }
821+
822+ auto zeSignalEvent = getSignalEvent (commandListLocked, phEvent,
823+ UR_COMMAND_ENQUEUE_USM_FREE_EXP);
824+ auto [pWaitEvents, numWaitEvents] =
825+ getWaitListView (commandListLocked, phEventWaitList, numEventsInWaitList);
826+
827+ umf_memory_pool_handle_t hPool = umfPoolByPtr (pMem);
828+ if (!hPool) {
829+ return UR_RESULT_ERROR_INVALID_MEM_OBJECT;
830+ }
831+
832+ UsmPool *usmPool = nullptr ;
833+ auto ret = umfPoolGetTag (hPool, (void **)&usmPool);
834+ if (ret != UMF_RESULT_SUCCESS || !usmPool) {
835+ // This should never happen
836+ UR_LOG (ERR, " enqueueUSMFreeExp: invalid pool tag" );
837+ return UR_RESULT_ERROR_UNKNOWN;
838+ }
839+
840+ size_t size = umfPoolMallocUsableSize (hPool, pMem);
841+ if (internalEvent == nullptr ) {
842+ // When the output event is used instead of an internal event, we need to
843+ // increment the refcount.
844+ (*phEvent)->RefCount .increment ();
845+ }
846+
847+ if (numWaitEvents > 0 ) {
848+ ZE2UR_CALL (
849+ zeCommandListAppendWaitOnEvents,
850+ (commandListLocked->getZeCommandList (), numWaitEvents, pWaitEvents));
851+ }
852+
853+ ZE2UR_CALL (zeCommandListAppendSignalEvent,
854+ (commandListLocked->getZeCommandList (), zeSignalEvent));
855+
856+ // Insert must be done after the signal event is appended.
857+ usmPool->asyncPool .insert (pMem, size, *phEvent, this );
858+
859+ return UR_RESULT_SUCCESS;
734860}
735861
736862ur_result_t ur_queue_immediate_in_order_t::bindlessImagesImageCopyExp (
@@ -881,9 +1007,9 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCommandListsExp(
8811007 " ur_queue_immediate_in_order_t::enqueueGenericCommandListsExp" );
8821008
8831009 auto commandListLocked = commandListManager.lock ();
1010+
8841011 auto zeSignalEvent =
8851012 getSignalEvent (commandListLocked, phEvent, callerCommand);
886-
8871013 auto [pWaitEvents, numWaitEvents] =
8881014 getWaitListView (commandListLocked, phEventWaitList, numEventsInWaitList,
8891015 additionalWaitEvent);
0 commit comments