@@ -93,16 +93,19 @@ ur_result_t doWait(ur_queue_handle_t hQueue, uint32_t numEventsInWaitList,
9393 OL_RETURN_ON_ERR (makeEvent (TYPE, TargetQueue, hQueue, phEvent));
9494
9595 if constexpr (Barrier) {
96- ol_event_handle_t BarrierEvent;
96+ ur_event_handle_t BarrierEvent;
9797 if (phEvent) {
98- BarrierEvent = (*phEvent)->OffloadEvent ;
98+ BarrierEvent = *phEvent;
99+ urEventRetain (BarrierEvent);
99100 } else {
100- OL_RETURN_ON_ERR (olCreateEvent ( TargetQueue, &BarrierEvent));
101+ OL_RETURN_ON_ERR (makeEvent (TYPE, TargetQueue, hQueue , &BarrierEvent));
101102 }
102103
103104 // Ensure any newly created work waits on this barrier
104105 if (hQueue->Barrier ) {
105- OL_RETURN_ON_ERR (olDestroyEvent (hQueue->Barrier ));
106+ if (auto Err = urEventRelease (hQueue->Barrier )) {
107+ return Err;
108+ }
106109 }
107110 hQueue->Barrier = BarrierEvent;
108111
@@ -114,7 +117,7 @@ ur_result_t doWait(ur_queue_handle_t hQueue, uint32_t numEventsInWaitList,
114117 if (Q == TargetQueue) {
115118 continue ;
116119 }
117- OL_RETURN_ON_ERR (olWaitEvents (Q, &BarrierEvent, 1 ));
120+ OL_RETURN_ON_ERR (olWaitEvents (Q, &BarrierEvent-> OffloadEvent , 1 ));
118121 }
119122 }
120123
@@ -260,6 +263,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite(
260263 blockingWrite, numEventsInWaitList, phEventWaitList, phEvent);
261264}
262265
266+ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy (
267+ ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc,
268+ ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size,
269+ uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
270+ ur_event_handle_t *phEvent) {
271+ char *DevPtrSrc =
272+ reinterpret_cast <char *>(std::get<BufferMem>(hBufferSrc->Mem ).Ptr );
273+ char *DevPtrDst =
274+ reinterpret_cast <char *>(std::get<BufferMem>(hBufferDst->Mem ).Ptr );
275+
276+ return doMemcpy (UR_COMMAND_MEM_BUFFER_COPY, hQueue, DevPtrDst + dstOffset,
277+ hQueue->OffloadDevice , DevPtrSrc + srcOffset,
278+ hQueue->OffloadDevice , size, false , numEventsInWaitList,
279+ phEventWaitList, phEvent);
280+ }
281+
263282UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead (
264283 ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name,
265284 bool blockingRead, size_t count, size_t offset, void *pDst,
@@ -366,3 +385,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap(
366385
367386 return Result;
368387}
388+
389+ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy (
390+ ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc,
391+ size_t size, uint32_t numEventsInWaitList,
392+ const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
393+ auto GetDevice = [&](const void *Ptr) {
394+ auto Res = hQueue->UrContext ->getAllocType (Ptr);
395+ if (!Res)
396+ return Adapter->HostDevice ;
397+ return Res->Type == OL_ALLOC_TYPE_HOST ? Adapter->HostDevice
398+ : hQueue->OffloadDevice ;
399+ };
400+
401+ return doMemcpy (UR_COMMAND_USM_MEMCPY, hQueue, pDst, GetDevice (pDst), pSrc,
402+ GetDevice (pSrc), size, blocking, numEventsInWaitList,
403+ phEventWaitList, phEvent);
404+
405+ return UR_RESULT_SUCCESS;
406+ }
0 commit comments