diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index 9c4fbadc58b6c..250ae8c1de832 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -47,7 +47,9 @@ DeviceGlobalUSMMem &DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM( "USM allocations should not be acquired for device_global with " "device_image_scope property."); const std::shared_ptr &CtxImpl = QueueImpl->getContextImplPtr(); - const device_impl &DevImpl = QueueImpl->getDeviceImpl(); + // DevImpl is not const since alignedAllocInternal may add mark about shared + // USM usage needed for host task handling. + device_impl &DevImpl = QueueImpl->getDeviceImpl(); std::lock_guard Lock(MDeviceToUSMPtrMapMutex); auto DGUSMPtr = MDeviceToUSMPtrMap.find({&DevImpl, CtxImpl.get()}); diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 90185f04f3e64..b6dcaf6f98c8b 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -296,6 +296,9 @@ class device_impl : public std::enable_shared_from_this { /// Get device architecture ext::oneapi::experimental::architecture getDeviceArch() const; + void setUSMAllocationPresent() { MSharedUSMAllocationPresent = true; } + bool isUSMAllocationPresent() const { return MSharedUSMAllocationPresent; } + private: ur_device_handle_t MDevice = 0; ur_device_type_t MType; @@ -307,6 +310,7 @@ class device_impl : public std::enable_shared_from_this { mutable ext::oneapi::experimental::architecture MDeviceArch{}; mutable std::once_flag MDeviceArchFlag; std::pair MDeviceHostBaseTime{0, 0}; + bool MSharedUSMAllocationPresent{}; }; // class device_impl } // namespace detail diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 2a5db00e26217..0653485f38efe 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -159,6 +159,14 @@ event_impl::event_impl(ur_event_handle_t Event, const context &SyclContext) } } +void event_impl::allocateHostProfilingInfo() { + MHostProfilingInfo.reset(new HostProfilingInfo()); + if (!MHostProfilingInfo) + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Out of host memory " + + codeToString(UR_RESULT_ERROR_OUT_OF_HOST_MEMORY)); +} + event_impl::event_impl(const QueueImplPtr &Queue) : MQueue{Queue}, MIsProfilingEnabled{!Queue || Queue->MIsProfilingEnabled}, MFallbackProfiling{MIsProfilingEnabled && Queue && @@ -167,12 +175,7 @@ event_impl::event_impl(const QueueImplPtr &Queue) this->setContextImpl(Queue->getContextImplPtr()); else { MState.store(HES_NotComplete); - MHostProfilingInfo.reset(new HostProfilingInfo()); - if (!MHostProfilingInfo) - throw sycl::exception( - sycl::make_error_code(sycl::errc::runtime), - "Out of host memory " + - codeToString(UR_RESULT_ERROR_OUT_OF_HOST_MEMORY)); + allocateHostProfilingInfo(); return; } MState.store(HES_Complete); @@ -400,7 +403,7 @@ uint64_t event_impl::get_profiling_info() { template <> uint32_t event_impl::get_info() { auto Handle = this->getHandle(); - if (!MIsHostEvent && Handle) { + if (Handle) { return get_event_info(Handle, this->getAdapter()); } @@ -496,18 +499,20 @@ void HostProfilingInfo::start() { StartTime = getTimestamp(); } void HostProfilingInfo::end() { EndTime = getTimestamp(); } ur_native_handle_t event_impl::getNative() { - if (isHost()) + auto Handle = getHandle(); + if (MIsHostEvent && !Handle) return {}; - initContextIfNeeded(); + initContextIfNeeded(); auto Adapter = getAdapter(); - auto Handle = getHandle(); + if (MIsDefaultConstructed && !Handle) { auto TempContext = MContext.get()->getHandleRef(); ur_event_native_properties_t NativeProperties{}; ur_event_handle_t UREvent = nullptr; Adapter->call( 0, TempContext, &NativeProperties, &UREvent); + Adapter->call(UREvent); this->setHandle(UREvent); Handle = UREvent; } @@ -631,8 +636,14 @@ bool event_impl::isCompleted() { void event_impl::setCommand(void *Cmd) { MCommand = Cmd; auto TypedCommand = static_cast(Cmd); - if (TypedCommand) - MIsHostEvent = TypedCommand->getWorkerContext() == nullptr; + if (TypedCommand && TypedCommand->getWorkerContext() == nullptr) + markAsHost(); +} + +void event_impl::markAsHost() { + MIsHostEvent = true; + if (!MHostProfilingInfo) + allocateHostProfilingInfo(); } } // namespace detail diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index b9e75ca0811df..0a5e486af4342 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -352,6 +352,8 @@ class event_impl { return MEvent && MQueue.expired() && !MIsEnqueued && !MCommand; } + void markAsHost(); + protected: // When instrumentation is enabled emits trace event for event wait begin and // returns the telemetry event generated for the wait @@ -361,6 +363,7 @@ class event_impl { void instrumentationEpilog(void *TelementryEvent, const std::string &Name, int32_t StreamID, uint64_t IId) const; void checkProfilingPreconditions() const; + void allocateHostProfilingInfo(); std::atomic MEvent = nullptr; // Stores submission time of command associated with event diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 8230f3a7f4906..028a8a0d42ea6 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -302,7 +302,7 @@ void queue_impl::addEvent(const event &Event) { const EventImplPtr &EImpl = getSyclObjImpl(Event); assert(EImpl && "Event implementation is missing"); auto *Cmd = static_cast(EImpl->getCommand()); - if (Cmd != nullptr && EImpl->getHandle() == nullptr && + if (Cmd != nullptr && (EImpl->getHandle() == nullptr || EImpl->isHost()) && !EImpl->isDiscarded()) { std::weak_ptr EventWeakPtr{EImpl}; std::lock_guard Lock{MMutex}; @@ -646,7 +646,8 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { EventImplWeakPtrIt->lock()) { // A nullptr UR event indicates that urQueueFinish will not cover it, // either because it's a host task event or an unenqueued one. - if (nullptr == EventImplSharedPtr->getHandle()) { + if (nullptr == EventImplSharedPtr->getHandle() || + EventImplSharedPtr->isHost()) { EventImplSharedPtr->wait(EventImplSharedPtr); } } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 0d09d05f15534..9fae1f4de1a64 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -275,7 +275,8 @@ class queue_impl { ur_native_handle_t nativeHandle = 0; getAdapter()->call(MQueue, nullptr, &nativeHandle); - __SYCL_OCL_CALL(clRetainCommandQueue, ur::cast(nativeHandle)); + __SYCL_OCL_CALL(clRetainCommandQueue, + ur::cast(nativeHandle)); return ur::cast(nativeHandle); } @@ -682,6 +683,12 @@ class queue_impl { return ResEvent; } + bool nativeHostTaskHandling() { + return std::getenv("SYCL_ENABLE_USER_EVENTS_PATH") && + !MDevice.isUSMAllocationPresent() && + (MDevice.getBackend() == backend::ext_oneapi_level_zero); + } + #ifndef __INTEL_PREVIEW_BREAKING_CHANGES // CMPLRLLVM-66082 // These methods are for accessing a member that should live in the diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 4e44e9df241ac..559f6c640eacb 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -235,7 +235,7 @@ Command::getUrEvents(const std::vector &EventImpls, // current one is a host task. In this case we should not skip ur event due // to different sync mechanisms for different task types on in-order queue. if (CommandQueue && EventImpl->getWorkerQueue() == CommandQueue && - CommandQueue->isInOrder() && !IsHostTaskCommand) + CommandQueue->isInOrder() && (!IsHostTaskCommand)) continue; RetUrEvents.push_back(Handle); @@ -287,7 +287,7 @@ Command::getUrEventsBlocking(const std::vector &EventImpls, // kept. if (!HasEventMode && MWorkerQueue && EventImpl->getWorkerQueue() == MWorkerQueue && - MWorkerQueue->isInOrder() && !isHostTask()) + MWorkerQueue->isInOrder() && (!isHostTask())) continue; RetUrEvents.push_back(EventImpl->getHandle()); @@ -504,7 +504,7 @@ void Command::waitForEvents(QueueImplPtr Queue, ur_event_handle_t &Event) { #ifndef NDEBUG for (const EventImplPtr &Event : EventImpls) - assert(!Event->isHost() && + assert(Event->getHandle() && "Only non-host events are expected to be waited for here"); #endif if (!EventImpls.empty()) { @@ -761,10 +761,11 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, // 2. Some types of commands do not produce UR events after they are // enqueued (e.g. alloca). Note that we can't check the ur event to make that // distinction since the command might still be unenqueued at this point. - bool PiEventExpected = - (!DepEvent->isHost() && !DepEvent->isDefaultConstructed()); + bool PiEventExpected = !DepEvent->isDefaultConstructed(); if (auto *DepCmd = static_cast(DepEvent->getCommand())) PiEventExpected &= DepCmd->producesPiEvent(); + else + PiEventExpected &= DepEvent->getHandle() != nullptr; if (!PiEventExpected) { // call to waitInternal() is in waitForPreparedHostEvents() as it's called @@ -776,7 +777,7 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, Command *ConnectionCmd = nullptr; ContextImplPtr DepEventContext = DepEvent->getContextImpl(); - // If contexts don't match we'll connect them using host task + // If contexts don't match we'll connect them using host task. if (DepEventContext != WorkerContext && WorkerContext) { Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; ConnectionCmd = GB.connectDepEvent(this, DepEvent, Dep, ToCleanUp); @@ -787,9 +788,9 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, } ContextImplPtr Command::getWorkerContext() const { - if (!MQueue) + if (!MWorkerQueue) return nullptr; - return MQueue->getContextImplPtr(); + return MWorkerQueue->getContextImplPtr(); } bool Command::producesPiEvent() const { return true; } @@ -929,6 +930,7 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, EnqueueResultT(EnqueueResultT::SyclEnqueueFailed, this, Res); else { MEvent->setEnqueued(); + // Host task is protected by MShouldCompleteEventIfPossible = false if (MShouldCompleteEventIfPossible && !MEvent->isDiscarded() && (MEvent->isHost() || MEvent->getHandle() == nullptr)) MEvent->setComplete(); @@ -1061,8 +1063,6 @@ void AllocaCommandBase::emitInstrumentationData() { #endif } -bool AllocaCommandBase::producesPiEvent() const { return false; } - bool AllocaCommandBase::supportsPostEnqueueCleanup() const { return false; } bool AllocaCommandBase::readyForCleanup() const { return false; } @@ -1094,6 +1094,14 @@ void AllocaCommand::emitInstrumentationData() { #endif } +bool AllocaCommand::producesPiEvent() const { + // for reference see enqueueImp() + auto TypedSyclMemObj = static_cast(getSYCLMemObj()); + // Event presence implies interop context esistence + return (TypedSyclMemObj->hasInteropEvent() && + (getContext(MQueue) == TypedSyclMemObj->getInteropContext())); +} + ur_result_t AllocaCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; @@ -1106,7 +1114,8 @@ ur_result_t AllocaCommand::enqueueImp() { if (!MQueue) { // Do not need to make allocation if we have a linked device allocation Command::waitForEvents(MQueue, EventImpls, UREvent); - MEvent->setHandle(UREvent); + assert(UREvent == nullptr && "AllocaCommand: waitForEvents without Queue " + "shouldn't produce native event."); return UR_RESULT_SUCCESS; } @@ -1121,6 +1130,10 @@ ur_result_t AllocaCommand::enqueueImp() { Result != UR_RESULT_SUCCESS) return Result; + assert((!!UREvent == producesPiEvent()) && + "AllocaCommand: native event is expected only when it is for interop " + "memory object with native event provided."); + MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; } @@ -1163,6 +1176,8 @@ AllocaSubBufCommand::AllocaSubBufCommand(QueueImplPtr Queue, Requirement Req, ToEnqueue.push_back(ConnectionCmd); } +bool AllocaSubBufCommand::producesPiEvent() const { return false; } + void AllocaSubBufCommand::emitInstrumentationData() { #ifdef XPTI_ENABLE_INSTRUMENTATION if (!xptiCheckTraceEnabled(MStreamID)) @@ -1202,8 +1217,8 @@ ur_result_t AllocaSubBufCommand::enqueueImp() { MRequirement.MAccessRange, std::move(EventImpls), UREvent); Result != UR_RESULT_SUCCESS) return Result; - - MEvent->setHandle(UREvent); + assert(UREvent == nullptr && + "AllocaSubBufCommand: it shouldn't produce native event."); XPTIRegistry::bufferAssociateNotification(MParentAlloca->getSYCLMemObj(), MMemAllocation); @@ -1314,6 +1329,7 @@ ur_result_t ReleaseCommand::enqueueImp() { EventImpls.push_back(UnmapEventImpl); } ur_event_handle_t UREvent = nullptr; + // Synchronous wait if (SkipRelease) Command::waitForEvents(MQueue, EventImpls, UREvent); else { @@ -1324,7 +1340,8 @@ ur_result_t ReleaseCommand::enqueueImp() { Result != UR_RESULT_SUCCESS) return Result; } - MEvent->setHandle(UREvent); + assert(!UREvent && "ReleaseCommand: release shouldn't produce native event."); + return UR_RESULT_SUCCESS; } @@ -1394,6 +1411,7 @@ ur_result_t MapMemObject::enqueueImp() { MSrcReq.MElemSize, std::move(RawEvents), UREvent); Result != UR_RESULT_SUCCESS) return Result; + assert(UREvent && "MapMemObject command must produce native event"); MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; @@ -1442,7 +1460,8 @@ void UnMapMemObject::emitInstrumentationData() { #endif } -bool UnMapMemObject::producesPiEvent() const { +static bool checkNativeEventForWA(const QueueImplPtr &Queue, + const ur_event_handle_t &NativeEvent) { // TODO remove this workaround once the batching issue is addressed in Level // Zero adapter. // Consider the following scenario on Level Zero: @@ -1458,9 +1477,13 @@ bool UnMapMemObject::producesPiEvent() const { // an event waitlist and Level Zero adapter attempts to batch these commands, // so the execution of kernel B starts only on step 4. This workaround // restores the old behavior in this case until this is resolved. - return MQueue && (MQueue->getDeviceImpl().getBackend() != - backend::ext_oneapi_level_zero || - MEvent->getHandle() != nullptr); + return Queue && (Queue->getDeviceImpl().getBackend() != + backend::ext_oneapi_level_zero || + NativeEvent != nullptr); +} + +bool UnMapMemObject::producesPiEvent() const { + return checkNativeEventForWA(MQueue, MEvent->getHandle()); } ur_result_t UnMapMemObject::enqueueImp() { @@ -1477,6 +1500,8 @@ ur_result_t UnMapMemObject::enqueueImp() { Result != UR_RESULT_SUCCESS) return Result; + assert((!!UREvent == checkNativeEventForWA(MQueue, UREvent)) && + "UnMapMemObject command must produce native event"); MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; @@ -1542,32 +1567,9 @@ void MemCpyCommand::emitInstrumentationData() { #endif } -ContextImplPtr MemCpyCommand::getWorkerContext() const { - if (!MWorkerQueue) - return nullptr; - return MWorkerQueue->getContextImplPtr(); -} - bool MemCpyCommand::producesPiEvent() const { - // TODO remove this workaround once the batching issue is addressed in Level - // Zero adapter. - // Consider the following scenario on Level Zero: - // 1. Kernel A, which uses buffer A, is submitted to queue A. - // 2. Kernel B, which uses buffer B, is submitted to queue B. - // 3. queueA.wait(). - // 4. queueB.wait(). - // DPCPP runtime used to treat unmap/write commands for buffer A/B as host - // dependencies (i.e. they were waited for prior to enqueueing any command - // that's dependent on them). This allowed Level Zero adapter to detect that - // each queue is idle on steps 1/2 and submit the command list right away. - // This is no longer the case since we started passing these dependencies in - // an event waitlist and Level Zero adapter attempts to batch these commands, - // so the execution of kernel B starts only on step 4. This workaround - // restores the old behavior in this case until this is resolved. - return !MQueue || - MQueue->getDeviceImpl().getBackend() != - backend::ext_oneapi_level_zero || - MEvent->getHandle() != nullptr; + return checkNativeEventForWA(MSrcQueue ? MSrcQueue : MQueue, + MEvent->getHandle()); } ur_result_t MemCpyCommand::enqueueImp() { @@ -1589,6 +1591,9 @@ ur_result_t MemCpyCommand::enqueueImp() { MEvent); Result != UR_RESULT_SUCCESS) return Result; + assert((!!UREvent == + checkNativeEventForWA(MSrcQueue ? MSrcQueue : MQueue, UREvent)) && + "MemCpyCommand must produce native event"); MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; @@ -1641,7 +1646,8 @@ ur_result_t UpdateHostRequirementCommand::enqueueImp() { std::vector EventImpls = MPreparedDepsEvents; ur_event_handle_t UREvent = nullptr; Command::waitForEvents(MQueue, EventImpls, UREvent); - MEvent->setHandle(UREvent); + assert((!!UREvent == producesPiEvent()) && + "UpdateHostRequirementCommand doesn't produce native event"); assert(MSrcAllocaCmd && "Expected valid alloca command"); assert(MSrcAllocaCmd->getMemAllocation() && "Expected valid source pointer"); @@ -1674,25 +1680,17 @@ void UpdateHostRequirementCommand::printDot(std::ostream &Stream) const { } } -MemCpyCommandHost::MemCpyCommandHost(Requirement SrcReq, - AllocaCommandBase *SrcAllocaCmd, - Requirement DstReq, void **DstPtr, - QueueImplPtr SrcQueue, - QueueImplPtr DstQueue) - : Command(CommandType::COPY_MEMORY, std::move(DstQueue)), - MSrcQueue(SrcQueue), MSrcReq(std::move(SrcReq)), - MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(DstReq)), MDstPtr(DstPtr) { - if (MSrcQueue) { - MEvent->setContextImpl(MSrcQueue->getContextImplPtr()); - } - - MWorkerQueue = !MQueue ? MSrcQueue : MQueue; - MEvent->setWorkerQueue(MWorkerQueue); - +MemCpyToHostCommand::MemCpyToHostCommand(Requirement SrcReq, + AllocaCommandBase *SrcAllocaCmd, + Requirement DstReq, void **DstPtr, + QueueImplPtr SrcQueue) + : Command(CommandType::COPY_MEMORY, std::move(SrcQueue)), + MSrcReq(std::move(SrcReq)), MSrcAllocaCmd(SrcAllocaCmd), + MDstReq(std::move(DstReq)), MDstPtr(DstPtr) { emitInstrumentationDataProxy(); } -void MemCpyCommandHost::emitInstrumentationData() { +void MemCpyToHostCommand::emitInstrumentationData() { #ifdef XPTI_ENABLE_INSTRUMENTATION if (!xptiCheckTraceEnabled(MStreamID)) return; @@ -1706,9 +1704,8 @@ void MemCpyCommandHost::emitInstrumentationData() { xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); xpti::addMetadata(CmdTraceEvent, "copy_from", - MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); - xpti::addMetadata(CmdTraceEvent, "copy_to", MQueue ? deviceToID(MQueue->get_device()) : 0); + xpti::addMetadata(CmdTraceEvent, "copy_to", 0); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); @@ -1716,23 +1713,17 @@ void MemCpyCommandHost::emitInstrumentationData() { #endif } -ContextImplPtr MemCpyCommandHost::getWorkerContext() const { - if (!MWorkerQueue) - return nullptr; - return MWorkerQueue->getContextImplPtr(); -} - -ur_result_t MemCpyCommandHost::enqueueImp() { +ur_result_t MemCpyToHostCommand::enqueueImp() { const QueueImplPtr &Queue = MWorkerQueue; waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getUrEvents(EventImpls); - ur_event_handle_t UREvent = nullptr; // Omit copying if mode is discard one. // TODO: Handle this at the graph building time by, for example, creating // empty node instead of memcpy. - if (MDstReq.MAccessMode == access::mode::discard_read_write || + if (ur_event_handle_t UREvent = nullptr; + MDstReq.MAccessMode == access::mode::discard_read_write || MDstReq.MAccessMode == access::mode::discard_write) { Command::waitForEvents(Queue, EventImpls, UREvent); @@ -1740,17 +1731,18 @@ ur_result_t MemCpyCommandHost::enqueueImp() { } flushCrossQueueDeps(EventImpls, MWorkerQueue); - + ur_event_handle_t UREvent = nullptr; if (auto Result = callMemOpHelper( MemoryManager::copy, MSrcAllocaCmd->getSYCLMemObj(), - MSrcAllocaCmd->getMemAllocation(), MSrcQueue, MSrcReq.MDims, + MSrcAllocaCmd->getMemAllocation(), MQueue, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, MSrcReq.MOffset, - MSrcReq.MElemSize, *MDstPtr, MQueue, MDstReq.MDims, + MSrcReq.MElemSize, *MDstPtr, nullptr, MDstReq.MDims, MDstReq.MMemoryRange, MDstReq.MAccessRange, MDstReq.MOffset, MDstReq.MElemSize, std::move(RawEvents), UREvent, MEvent); Result != UR_RESULT_SUCCESS) return Result; - + assert((!!UREvent == producesPiEvent()) && + "MemCpyCommandHost must produce native event"); MEvent->setHandle(UREvent); return UR_RESULT_SUCCESS; } @@ -1763,7 +1755,8 @@ ur_result_t EmptyCommand::enqueueImp() { waitForPreparedHostEvents(); ur_event_handle_t UREvent = nullptr; waitForEvents(MQueue, MPreparedDepsEvents, UREvent); - MEvent->setHandle(UREvent); + assert((!!UREvent == producesPiEvent()) && + "EmptyCommand doesn't produce native event"); return UR_RESULT_SUCCESS; } @@ -1828,7 +1821,7 @@ void EmptyCommand::printDot(std::ostream &Stream) const { bool EmptyCommand::producesPiEvent() const { return false; } -void MemCpyCommandHost::printDot(std::ostream &Stream) const { +void MemCpyToHostCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#B6A2EB\", label=\""; Stream << "ID = " << this << "\\n"; @@ -1846,9 +1839,8 @@ void MemCpyCommandHost::printDot(std::ostream &Stream) const { } UpdateHostRequirementCommand::UpdateHostRequirementCommand( - QueueImplPtr Queue, Requirement Req, AllocaCommandBase *SrcAllocaCmd, - void **DstPtr) - : Command(CommandType::UPDATE_REQUIREMENT, std::move(Queue)), + Requirement Req, AllocaCommandBase *SrcAllocaCmd, void **DstPtr) + : Command(CommandType::UPDATE_REQUIREMENT, nullptr), MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(Req)), MDstPtr(DstPtr) { emitInstrumentationDataProxy(); @@ -1953,7 +1945,11 @@ ExecCGCommand::ExecCGCommand( if (MCommandGroup->getType() == detail::CGType::CodeplayHostTask) { MEvent->setSubmittedQueue( static_cast(MCommandGroup.get())->MQueue); + MWorkerQueue = nullptr; + MEvent->setWorkerQueue(MWorkerQueue); + MEvent->markAsHost(); } + if (MCommandGroup->getType() == detail::CGType::ProfilingTag) MEvent->markAsProfilingTagEvent(); @@ -3112,6 +3108,8 @@ ur_result_t ExecCGCommand::enqueueImp() { } else { return enqueueImpQueue(); } + assert((!!MEvent->getHandle() == producesPiEvent()) && + "ExecCGCommand must produce native event"); } ur_result_t ExecCGCommand::enqueueImpQueue() { @@ -3406,7 +3404,14 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { // Host task is executed asynchronously so we should record where it was // submitted to report exception origin properly. copySubmissionCodeLocation(); - + if (producesPiEvent()) { + auto TempContext = getContext(HostTask->MQueue)->getHandleRef(); + ur_event_native_properties_t NativeProperties{}; + auto &Adapter = MQueue->getAdapter(); + Adapter->call( + 0, TempContext, &NativeProperties, &UREvent); + MEvent->setHandle(UREvent); + } queue_impl::getThreadPool().submit( DispatchHostTask(this, std::move(ReqToMem), std::move(ReqUrMem))); @@ -3784,7 +3789,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { bool ExecCGCommand::producesPiEvent() const { return !MCommandBuffer && - MCommandGroup->getType() != CGType::CodeplayHostTask; + !( + MCommandGroup->getType() == CGType::CodeplayHostTask && !MQueue /* MQueue is set only when we have native sync with host task */); } bool ExecCGCommand::supportsPostEnqueueCleanup() const { @@ -3807,11 +3813,17 @@ UpdateCommandBufferCommand::UpdateCommandBufferCommand( : Command(CommandType::UPDATE_CMD_BUFFER, Queue), MGraph(Graph), MNodes(Nodes) {} +bool UpdateCommandBufferCommand::producesPiEvent() const { + return !MPreparedDepsEvents.empty(); +} + ur_result_t UpdateCommandBufferCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; ur_event_handle_t UREvent = nullptr; Command::waitForEvents(MQueue, EventImpls, UREvent); + assert((!!UREvent == producesPiEvent()) && + "UpdateCommandBufferCommand produces native event"); MEvent->setHandle(UREvent); auto CheckAndFindAlloca = [](Requirement *Req, const DepDesc &Dep) { @@ -3881,7 +3893,6 @@ void UpdateCommandBufferCommand::printDot(std::ostream &Stream) const { } void UpdateCommandBufferCommand::emitInstrumentationData() {} -bool UpdateCommandBufferCommand::producesPiEvent() const { return false; } } // namespace detail } // namespace _V1 diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index d6f439f536776..dfcb9012340cc 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -165,7 +165,9 @@ class Command { } // Shows that command could be enqueued, but is blocking enqueue of all // commands depending on it. Regular usage - host task. - bool isBlocking() const { return isHostTask() && !MEvent->isCompleted(); } + bool isBlocking() const { + return isHostTask() && !producesPiEvent() && !MEvent->isCompleted(); + } void addBlockedUserUnique(const EventImplPtr &NewUser) { if (std::find(MBlockedUsers.begin(), MBlockedUsers.end(), NewUser) != @@ -222,7 +224,8 @@ class Command { /// Get the context of the queue this command will be submitted to. Could /// differ from the context of MQueue for memory copy commands. - virtual ContextImplPtr getWorkerContext() const; + ContextImplPtr getWorkerContext() const; + QueueImplPtr getWorkerQueue() const { return MWorkerQueue; } /// Returns true iff the command produces a UR event on non-host devices. virtual bool producesPiEvent() const; @@ -262,6 +265,7 @@ class Command { std::vector &MPreparedDepsEvents; std::vector &MPreparedHostDepsEvents; + // Event is nullptr when Queue == nullptr too void waitForEvents(QueueImplPtr Queue, std::vector &RawEvents, ur_event_handle_t &Event); @@ -457,8 +461,6 @@ class AllocaCommandBase : public Command { void emitInstrumentationData() override; - bool producesPiEvent() const final; - bool supportsPostEnqueueCleanup() const final; bool readyForCleanup() const final; @@ -497,6 +499,7 @@ class AllocaCommand : public AllocaCommandBase { void *getMemAllocation() const final { return MMemAllocation; } void printDot(std::ostream &Stream) const final; void emitInstrumentationData() override; + bool producesPiEvent() const final; private: ur_result_t enqueueImp() final; @@ -518,6 +521,7 @@ class AllocaSubBufCommand : public AllocaCommandBase { void printDot(std::ostream &Stream) const final; AllocaCommandBase *getParentAlloca() { return MParentAlloca; } void emitInstrumentationData() override; + bool producesPiEvent() const final; private: ur_result_t enqueueImp() final; @@ -574,7 +578,6 @@ class MemCpyCommand : public Command { void printDot(std::ostream &Stream) const final; const Requirement *getRequirement() const final { return &MDstReq; } void emitInstrumentationData() final; - ContextImplPtr getWorkerContext() const final; bool producesPiEvent() const final; private: @@ -589,21 +592,19 @@ class MemCpyCommand : public Command { /// The mem copy host command enqueues memory copy between two instances of /// memory object. -class MemCpyCommandHost : public Command { +class MemCpyToHostCommand : public Command { public: - MemCpyCommandHost(Requirement SrcReq, AllocaCommandBase *SrcAllocaCmd, - Requirement DstReq, void **DstPtr, QueueImplPtr SrcQueue, - QueueImplPtr DstQueue); + MemCpyToHostCommand(Requirement SrcReq, AllocaCommandBase *SrcAllocaCmd, + Requirement DstReq, void **DstPtr, QueueImplPtr SrcQueue); void printDot(std::ostream &Stream) const final; const Requirement *getRequirement() const final { return &MDstReq; } void emitInstrumentationData() final; - ContextImplPtr getWorkerContext() const final; + bool producesPiEvent() const final { return !!MQueue; } private: ur_result_t enqueueImp() final; - QueueImplPtr MSrcQueue; Requirement MSrcReq; AllocaCommandBase *MSrcAllocaCmd = nullptr; Requirement MDstReq; @@ -691,12 +692,13 @@ std::pair emitKernelInstrumentationData( class UpdateHostRequirementCommand : public Command { public: - UpdateHostRequirementCommand(QueueImplPtr Queue, Requirement Req, - AllocaCommandBase *SrcAllocaCmd, void **DstPtr); + UpdateHostRequirementCommand(Requirement Req, AllocaCommandBase *SrcAllocaCmd, + void **DstPtr); void printDot(std::ostream &Stream) const final; const Requirement *getRequirement() const final { return &MDstReq; } void emitInstrumentationData() final; + bool producesPiEvent() const final { return false; } private: ur_result_t enqueueImp() final; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index d4a2a3cef1251..dec017286f1f6 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -270,18 +270,16 @@ void Scheduler::GraphBuilder::addNodeToLeaves( } UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd( - MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue, - std::vector &ToEnqueue) { - auto Context = queue_impl::getContext(Queue); - AllocaCommandBase *AllocaCmd = findAllocaForReq(Record, Req, Context); + MemObjRecord *Record, Requirement *Req, std::vector &ToEnqueue) { + AllocaCommandBase *AllocaCmd = findAllocaForReq(Record, Req, nullptr); assert(AllocaCmd && "There must be alloca for requirement!"); UpdateHostRequirementCommand *UpdateCommand = - new UpdateHostRequirementCommand(Queue, *Req, AllocaCmd, &Req->MData); + new UpdateHostRequirementCommand(*Req, AllocaCmd, &Req->MData); // Need copy of requirement because after host accessor destructor call // dependencies become invalid if requirement is stored by pointer. const Requirement *StoredReq = UpdateCommand->getRequirement(); - std::set Deps = findDepsForReq(Record, Req, Context); + std::set Deps = findDepsForReq(Record, Req, nullptr); std::vector ToCleanUp; for (Command *Dep : Deps) { Command *ConnCmd = @@ -306,10 +304,10 @@ static Command *insertMapUnmapForLinkedCmds(AllocaCommandBase *AllocaCmdSrc, assert(AllocaCmdSrc->MIsActive && "Expected source alloca command to be active"); - if (!AllocaCmdSrc->getQueue()) { + if (!AllocaCmdSrc->getWorkerQueue()) { UnMapMemObject *UnMapCmd = new UnMapMemObject( AllocaCmdDst, *AllocaCmdDst->getRequirement(), - &AllocaCmdSrc->MMemAllocation, AllocaCmdDst->getQueue()); + &AllocaCmdSrc->MMemAllocation, AllocaCmdDst->getWorkerQueue()); std::swap(AllocaCmdSrc->MIsActive, AllocaCmdDst->MIsActive); @@ -318,7 +316,7 @@ static Command *insertMapUnmapForLinkedCmds(AllocaCommandBase *AllocaCmdSrc, MapMemObject *MapCmd = new MapMemObject( AllocaCmdSrc, *AllocaCmdSrc->getRequirement(), - &AllocaCmdDst->MMemAllocation, AllocaCmdSrc->getQueue(), MapMode); + &AllocaCmdDst->MMemAllocation, AllocaCmdSrc->getWorkerQueue(), MapMode); std::swap(AllocaCmdSrc->MIsActive, AllocaCmdDst->MIsActive); @@ -351,9 +349,10 @@ Command *Scheduler::GraphBuilder::insertMemoryMove( // current context, need to find a parent alloca command for it (it must be // there) auto IsSuitableAlloca = [Record](AllocaCommandBase *AllocaCmd) { - bool Res = isOnSameContext(Record->MCurContext, AllocaCmd->getQueue()) && - // Looking for a parent buffer alloca command - AllocaCmd->getType() == Command::CommandType::ALLOCA; + bool Res = + isOnSameContext(Record->MCurContext, AllocaCmd->getWorkerQueue()) && + // Looking for a parent buffer alloca command + AllocaCmd->getType() == Command::CommandType::ALLOCA; return Res; }; const auto It = @@ -391,10 +390,10 @@ Command *Scheduler::GraphBuilder::insertMemoryMove( } else { // Full copy of buffer is needed to avoid loss of data that may be caused // by copying specific range from host to device and backwards. - NewCmd = - new MemCpyCommand(*AllocaCmdSrc->getRequirement(), AllocaCmdSrc, - *AllocaCmdDst->getRequirement(), AllocaCmdDst, - AllocaCmdSrc->getQueue(), AllocaCmdDst->getQueue()); + NewCmd = new MemCpyCommand(*AllocaCmdSrc->getRequirement(), AllocaCmdSrc, + *AllocaCmdDst->getRequirement(), AllocaCmdDst, + AllocaCmdSrc->getWorkerQueue(), + AllocaCmdDst->getWorkerQueue()); } } std::vector ToCleanUp; @@ -415,7 +414,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove( Command *Scheduler::GraphBuilder::remapMemoryObject( MemObjRecord *Record, Requirement *Req, AllocaCommandBase *HostAllocaCmd, std::vector &ToEnqueue) { - assert(!HostAllocaCmd->getQueue() && "Host alloca command expected"); + assert(!HostAllocaCmd->getWorkerQueue() && "Host alloca command expected"); assert(HostAllocaCmd->MIsActive && "Active alloca command expected"); AllocaCommandBase *LinkedAllocaCmd = HostAllocaCmd->MLinkedAllocaCmd; @@ -425,15 +424,16 @@ Command *Scheduler::GraphBuilder::remapMemoryObject( UnMapMemObject *UnMapCmd = new UnMapMemObject( LinkedAllocaCmd, *LinkedAllocaCmd->getRequirement(), - &HostAllocaCmd->MMemAllocation, LinkedAllocaCmd->getQueue()); + &HostAllocaCmd->MMemAllocation, LinkedAllocaCmd->getWorkerQueue()); // Map write only as read-write access::mode MapMode = Req->MAccessMode; if (MapMode == access::mode::write) MapMode = access::mode::read_write; - MapMemObject *MapCmd = new MapMemObject( - LinkedAllocaCmd, *LinkedAllocaCmd->getRequirement(), - &HostAllocaCmd->MMemAllocation, LinkedAllocaCmd->getQueue(), MapMode); + MapMemObject *MapCmd = + new MapMemObject(LinkedAllocaCmd, *LinkedAllocaCmd->getRequirement(), + &HostAllocaCmd->MMemAllocation, + LinkedAllocaCmd->getWorkerQueue(), MapMode); std::vector ToCleanUp; for (Command *Dep : Deps) { @@ -474,15 +474,15 @@ Scheduler::GraphBuilder::addCopyBack(Requirement *Req, AllocaCommandBase *SrcAllocaCmd = findAllocaForReq(Record, Req, Record->MCurContext); - auto MemCpyCmdUniquePtr = std::make_unique( + auto MemCpyCmdUniquePtr = std::make_unique( *SrcAllocaCmd->getRequirement(), SrcAllocaCmd, *Req, &Req->MData, - SrcAllocaCmd->getQueue(), nullptr); + SrcAllocaCmd->getWorkerQueue()); if (!MemCpyCmdUniquePtr) throw exception(make_error_code(errc::memory_allocation), "Out of host memory"); - MemCpyCommandHost *MemCpyCmd = MemCpyCmdUniquePtr.release(); + MemCpyToHostCommand *MemCpyCmd = MemCpyCmdUniquePtr.release(); std::vector ToCleanUp; for (Command *Dep : Deps) { @@ -524,7 +524,7 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, AllocaCommandBase *HostAllocaCmd = getOrCreateAllocaForReq(Record, Req, nullptr, ToEnqueue); - if (isOnSameContext(Record->MCurContext, HostAllocaCmd->getQueue())) { + if (isOnSameContext(Record->MCurContext, HostAllocaCmd->getWorkerQueue())) { if (!isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess)) { remapMemoryObject(Record, Req, Req->MIsSubBuffer ? (static_cast( @@ -536,8 +536,7 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, } else insertMemoryMove(Record, Req, nullptr, ToEnqueue); - Command *UpdateHostAccCmd = - insertUpdateHostReqCmd(Record, Req, nullptr, ToEnqueue); + Command *UpdateHostAccCmd = insertUpdateHostReqCmd(Record, Req, ToEnqueue); // Need empty command to be blocked until host accessor is destructed EmptyCommand *EmptyCmd = addEmptyCmd( @@ -606,7 +605,7 @@ Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record, // Going through copying memory between contexts is not supported. if (Dep.MDepCommand) { - auto DepQueue = Dep.MDepCommand->getQueue(); + auto DepQueue = Dep.MDepCommand->getWorkerQueue(); CanBypassDep &= isOnSameContext(Context, DepQueue); } @@ -647,7 +646,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq( bool AllowConst) { auto IsSuitableAlloca = [&Context, Req, AllowConst](AllocaCommandBase *AllocaCmd) { - bool Res = isOnSameContext(Context, AllocaCmd->getQueue()); + bool Res = isOnSameContext(Context, AllocaCmd->getWorkerQueue()); if (IsSuitableSubReq(Req)) { const Requirement *TmpReq = AllocaCmd->getRequirement(); Res &= AllocaCmd->getType() == Command::CommandType::ALLOCA_SUB_BUF; @@ -921,20 +920,20 @@ static void combineAccessModesOfReqs(std::vector &Reqs) { } Command *Scheduler::GraphBuilder::addCG( - std::unique_ptr CommandGroup, const QueueImplPtr &Queue, - std::vector &ToEnqueue, bool EventNeeded, - ur_exp_command_buffer_handle_t CommandBuffer, + std::unique_ptr CommandGroup, + const QueueImplPtr &SubmittedQueue, std::vector &ToEnqueue, + bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer, const std::vector &Dependencies) { std::vector &Reqs = CommandGroup->getRequirements(); std::vector &Events = CommandGroup->getEvents(); - auto NewCmd = std::make_unique(std::move(CommandGroup), Queue, - EventNeeded, CommandBuffer, - std::move(Dependencies)); - + auto NewCmd = std::make_unique( + std::move(CommandGroup), SubmittedQueue, EventNeeded, CommandBuffer, + std::move(Dependencies)); if (!NewCmd) throw exception(make_error_code(errc::memory_allocation), "Out of host memory"); + auto Queue = NewCmd->getWorkerQueue(); bool isInteropTask = isInteropHostTask(NewCmd.get()); @@ -1225,7 +1224,7 @@ Command *Scheduler::GraphBuilder::connectDepEvent( try { std::shared_ptr HT(new detail::HostTask); std::unique_ptr ConnectCG(new detail::CGHostTask( - std::move(HT), /* Queue = */ Cmd->getQueue(), /* Context = */ {}, + std::move(HT), /* Queue = */ Cmd->getWorkerQueue(), /* Context = */ {}, /* Args = */ {}, detail::CG::StorageInitHelper( /* ArgsStorage = */ {}, /* AccStorage = */ {}, diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 17b972e50da30..17ac93511a2eb 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -121,8 +121,10 @@ EventImplPtr Scheduler::addCG( MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), AuxiliaryCmds); break; case CGType::CodeplayHostTask: { - NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), nullptr, - AuxiliaryCmds, EventNeeded); + NewCmd = + MGraphBuilder.addCG(std::move(CommandGroup), + Queue->nativeHostTaskHandling() ? Queue : nullptr, + AuxiliaryCmds, EventNeeded); break; } default: @@ -495,7 +497,10 @@ void Scheduler::NotifyHostTaskCompletion(Command *Cmd) { // update self-event status CmdEvent->setComplete(); } - Scheduler::enqueueUnblockedCommands(Cmd->MBlockedUsers, Lock, ToCleanUp); + if (auto NativeEvent = CmdEvent->getHandle()) { + QueueImpl->getAdapter()->call(NativeEvent); + } else + Scheduler::enqueueUnblockedCommands(Cmd->MBlockedUsers, Lock, ToCleanUp); } QueueImpl->revisitUnenqueuedCommandsState(CmdEvent); diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 5b657c1f13b93..d63781509e2f2 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -678,7 +678,6 @@ class Scheduler { UpdateHostRequirementCommand * insertUpdateHostReqCmd(MemObjRecord *Record, Requirement *Req, - const QueueImplPtr &Queue, std::vector &ToEnqueue); /// Finds dependencies for the requirement. diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index f289c1e62b371..3381261a5ab5a 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -246,6 +246,10 @@ void SYCLMemObjT::handleWriteAccessorCreation() { } } +bool SYCLMemObjT::hasInteropEvent() const { + return MInteropEvent && MInteropEvent->getHandle(); +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/sycl_mem_obj_t.hpp b/sycl/source/detail/sycl_mem_obj_t.hpp index cd3a717fdb8cf..4003aacd198f4 100644 --- a/sycl/source/detail/sycl_mem_obj_t.hpp +++ b/sycl/source/detail/sycl_mem_obj_t.hpp @@ -159,6 +159,8 @@ class SYCLMemObjT : public SYCLMemObjI { MHostPtrProvided = true; } + bool hasInteropEvent() const; + protected: void updateHostMemory(void *const Ptr); diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index 1ea02f73b3846..c22fd763de6fc 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -110,10 +110,10 @@ extern xpti::trace_event_data_t *GSYCLGraphEvent; #endif namespace usm { +// Device ptr is not const to mark USM shared presence if needed. void *alignedAllocInternal(size_t Alignment, size_t Size, - const context_impl *CtxImpl, - const device_impl *DevImpl, alloc Kind, - const property_list &PropList) { + const context_impl *CtxImpl, device_impl *DevImpl, + alloc Kind, const property_list &PropList) { if (Kind == alloc::device && !DevImpl->has(sycl::aspect::usm_device_allocations)) { throw sycl::exception(sycl::errc::feature_not_supported, @@ -195,6 +195,8 @@ void *alignedAllocInternal(size_t Alignment, size_t Size, Error = Adapter->call_nocheck( C, Dev, &UsmDesc, /*pool=*/nullptr, Size, &RetVal); + if (Error == UR_RESULT_SUCCESS) + DevImpl->setUSMAllocationPresent(); break; } diff --git a/sycl/source/detail/usm/usm_impl.hpp b/sycl/source/detail/usm/usm_impl.hpp index 77ab25b0ee3e7..a650365f73894 100644 --- a/sycl/source/detail/usm/usm_impl.hpp +++ b/sycl/source/detail/usm/usm_impl.hpp @@ -16,8 +16,8 @@ namespace detail { namespace usm { void *alignedAllocInternal(size_t Alignment, size_t Size, - const context_impl *CtxImpl, - const device_impl *DevImpl, sycl::usm::alloc Kind, + const context_impl *CtxImpl, device_impl *DevImpl, + sycl::usm::alloc Kind, const property_list &PropList = {}); void freeInternal(void *Ptr, const context_impl *CtxImpl); diff --git a/sycl/test-e2e/Basic/empty_command.cpp b/sycl/test-e2e/Basic/empty_command.cpp index dac5865ae8d72..4d155cc071b9b 100644 --- a/sycl/test-e2e/Basic/empty_command.cpp +++ b/sycl/test-e2e/Basic/empty_command.cpp @@ -4,9 +4,11 @@ #include #include +#include #include #include +using namespace std::chrono_literals; using namespace sycl; void test_host_task_dep() { @@ -50,6 +52,10 @@ void test_device_event_dep() { start_execution.count_down(); empty_cg_event.wait(); + std::cout << "p = " << *p << std::endl; + std::this_thread::sleep_for(200ms); + std::cout << "p2 = " << *p << std::endl; + assert(*p == 42); sycl::free(p, q); diff --git a/sycl/test-e2e/format.py b/sycl/test-e2e/format.py index 116579c21f825..a36284d60f237 100644 --- a/sycl/test-e2e/format.py +++ b/sycl/test-e2e/format.py @@ -332,7 +332,7 @@ def get_extra_env(sycl_devices): dev_features = test.config.sycl_dev_features[full_dev_name] if "level_zero_v2_adapter" in dev_features: - expanded += " env UR_LOADER_USE_LEVEL_ZERO_V2=1" + expanded += " env UR_LOADER_USE_LEVEL_ZERO_V2=1 SYCL_ENABLE_USER_EVENTS_PATH=1" expanded += " ONEAPI_DEVICE_SELECTOR={} {}".format( parsed_dev_name, test.config.run_launcher diff --git a/sycl/unittests/queue/ShortcutFunctions.cpp b/sycl/unittests/queue/ShortcutFunctions.cpp index f6b8dd99dd2f5..c573bc069ff22 100644 --- a/sycl/unittests/queue/ShortcutFunctions.cpp +++ b/sycl/unittests/queue/ShortcutFunctions.cpp @@ -34,8 +34,10 @@ ur_result_t redefinedEnqueueMemBufferWrite(void *) { return UR_RESULT_SUCCESS; } -ur_result_t redefinedEnqueueMemBufferRead(void *) { +ur_result_t redefinedEnqueueMemBufferRead(void *pParams) { TestContext->BufferReadCalled = true; + auto params = *static_cast(pParams); + **params.pphEvent = mock::createDummyHandle(); return UR_RESULT_SUCCESS; } diff --git a/sycl/unittests/scheduler/CMakeLists.txt b/sycl/unittests/scheduler/CMakeLists.txt index afc0e185eb7c0..caf94a61a1b1e 100644 --- a/sycl/unittests/scheduler/CMakeLists.txt +++ b/sycl/unittests/scheduler/CMakeLists.txt @@ -1,3 +1,4 @@ +add_definitions(-g -O0) add_sycl_unittest(SchedulerTests OBJECT BlockedCommands.cpp Commands.cpp diff --git a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp index cf0c91e1478c5..07c450cee354a 100644 --- a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp @@ -9,6 +9,7 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" +#include #include #include @@ -23,19 +24,55 @@ using namespace sycl; size_t GEventsWaitCounter = 0; +auto DummyHostTaskEvent = mock::createDummyHandle(); +bool HostTaskReady = false; +std::mutex HostTaskMutex; +std::condition_variable HostTaskCV; + +size_t EventSignaled = 0; +size_t EventCreated = 0; + inline ur_result_t redefinedEventsWait(void *pParams) { auto params = *static_cast(pParams); if (*params.pnumEvents > 0) { GEventsWaitCounter++; + + if (**params.pphEventWaitList == DummyHostTaskEvent) { + std::unique_lock lk(HostTaskMutex); + HostTaskCV.wait(lk, [] { return HostTaskReady; }); + } + } + + return UR_RESULT_SUCCESS; +} + +inline ur_result_t redefinedEventHostSignal(void *pParams) { + EventSignaled = true; + { + std::unique_lock lk(HostTaskMutex); + HostTaskReady = true; } + HostTaskCV.notify_one(); return UR_RESULT_SUCCESS; } -TEST_F(SchedulerTest, InOrderQueueHostTaskDeps) { +inline ur_result_t redefinedEventCreateWithNativeHandle(void *pParams) { + EventCreated = true; + auto params = + *static_cast(pParams); + **params.pphEvent = DummyHostTaskEvent; + return UR_RESULT_SUCCESS; +} + +template void InOrderQueueHostTaskDepsTestBody() { GEventsWaitCounter = 0; - sycl::unittest::UrMock<> Mock; + sycl::unittest::UrMock Mock; sycl::platform Plt = sycl::platform(); mock::getCallbacks().set_before_callback("urEventWait", &redefinedEventsWait); + mock::getCallbacks().set_replace_callback("urEventHostSignal", + &redefinedEventHostSignal); + mock::getCallbacks().set_replace_callback( + "urEventCreateWithNativeHandle", &redefinedEventCreateWithNativeHandle); context Ctx{Plt}; queue InOrderQueue{Ctx, default_selector_v, property::queue::in_order()}; @@ -45,8 +82,20 @@ TEST_F(SchedulerTest, InOrderQueueHostTaskDeps) { [&](sycl::handler &CGH) { CGH.memset(buf, 0, sizeof(buf[0])); }); InOrderQueue.submit([&](sycl::handler &CGH) { CGH.host_task([=] {}); }) .wait(); - + EXPECT_EQ(EventCreated, Backend == sycl::backend::ext_oneapi_level_zero); EXPECT_EQ(GEventsWaitCounter, 1u); + EXPECT_EQ(EventSignaled, Backend == sycl::backend::ext_oneapi_level_zero); +} + +TEST_F(SchedulerTest, InOrderQueueHostTaskDepsOCL) { + InOrderQueueHostTaskDepsTestBody(); +} + +TEST_F(SchedulerTest, InOrderQueueHostTaskDepsL0) { + std::function DoNothing = [] {}; + unittest::ScopedEnvVar HostTaskViaNativeEvent{"SYCL_ENABLE_USER_EVENTS_PATH", + "1", DoNothing}; + InOrderQueueHostTaskDepsTestBody(); } enum class CommandType { KERNEL = 1, MEMSET = 2 }; diff --git a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp index 1a5fa726170b8..371ad6a08e7c3 100644 --- a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp +++ b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp @@ -13,12 +13,12 @@ using namespace sycl; -class MemObjMock : public sycl::detail::SYCLMemObjI { +class MemObjMock : public sycl::detail::SYCLMemObjT { public: using ContextImplPtr = std::shared_ptr; MemObjMock(const std::shared_ptr &Record) - : SYCLMemObjI() { + : SYCLMemObjT({}, nullptr) { MRecord = Record; } @@ -38,6 +38,7 @@ class MemObjMock : public sycl::detail::SYCLMemObjI { bool hasUserDataPtr() const override { return false; } bool isHostPointerReadOnly() const override { return false; } bool usesPinnedHostMemory() const override { return false; } + bool hasInteropEvent() const { return false; } detail::ContextImplPtr getInteropContext() const override { return nullptr; } }; @@ -62,7 +63,9 @@ TEST_F(SchedulerTest, LinkedAllocaDependencies) { // Commands are linked only if the device supports host unified memory. sycl::queue Queue1{Dev}; + sycl::queue Queue2{Dev}; sycl::detail::QueueImplPtr Q1 = sycl::detail::getSyclObjImpl(Queue1); + sycl::detail::QueueImplPtr Q2 = sycl::detail::getSyclObjImpl(Queue2); auto AllocaDep = [](sycl::detail::Command *, sycl::detail::Command *, sycl::detail::MemObjRecord *, @@ -77,8 +80,8 @@ TEST_F(SchedulerTest, LinkedAllocaDependencies) { sycl::detail::AllocaCommand AllocaCmd1(nullptr, Req, false); Record->MAllocaCommands.push_back(&AllocaCmd1); - MockCommand DepCmd(nullptr, Req); - MockCommand DepDepCmd(nullptr, Req); + MockCommand DepCmd(Q2, Req); + MockCommand DepDepCmd(Q2, Req); DepCmd.MDeps.push_back({&DepDepCmd, DepDepCmd.getRequirement(), &AllocaCmd1}); DepDepCmd.MUsers.insert(&DepCmd); std::vector ToEnqueue; @@ -87,7 +90,7 @@ TEST_F(SchedulerTest, LinkedAllocaDependencies) { MockScheduler MS; sycl::detail::Command *AllocaCmd2 = MS.getOrCreateAllocaForReq(Record.get(), &Req, Q1, ToEnqueue); - + ASSERT_NE(AllocaCmd2, &AllocaCmd1); ASSERT_TRUE(!!AllocaCmd1.MLinkedAllocaCmd) << "No link appeared in existing command"; ASSERT_EQ(AllocaCmd1.MLinkedAllocaCmd, AllocaCmd2) << "Invalid link appeared"; diff --git a/sycl/unittests/scheduler/QueueFlushing.cpp b/sycl/unittests/scheduler/QueueFlushing.cpp index 82cda17f1fa95..f05779f632078 100644 --- a/sycl/unittests/scheduler/QueueFlushing.cpp +++ b/sycl/unittests/scheduler/QueueFlushing.cpp @@ -20,6 +20,13 @@ static bool QueueFlushed = false; static bool EventStatusQueried = false; static ur_event_status_t EventStatus = UR_EVENT_STATUS_QUEUED; +static ur_result_t redefinedMemBufferReadRect(void *pParams) { + auto params = + *static_cast(pParams); + **params.pphEvent = mock::createDummyHandle(); + return UR_RESULT_SUCCESS; +} + static ur_result_t redefinedQueueFlush(void *pParams) { auto params = *static_cast(pParams); EXPECT_EQ(ExpectedDepQueue, *params.phQueue); @@ -87,6 +94,8 @@ TEST_F(SchedulerTest, QueueFlushing) { &redefinedQueueFlush); mock::getCallbacks().set_after_callback("urEventGetInfo", &redefinedEventGetInfoAfter); + mock::getCallbacks().set_after_callback("urEnqueueMemBufferReadRect", + &redefinedMemBufferReadRect); context Ctx{Plt}; queue QueueA{Ctx, default_selector_v}; @@ -124,8 +133,8 @@ TEST_F(SchedulerTest, QueueFlushing) { &HostAllocaCmd, QueueImplA, nullptr}; testCommandEnqueue(&MemCpyCmd, QueueImplB, MockReq); - detail::MemCpyCommandHost MemCpyCmdHost{MockReq, &AllocaCmd, MockReq, - &MockHostPtr, QueueImplA, nullptr}; + detail::MemCpyToHostCommand MemCpyCmdHost{MockReq, &AllocaCmd, MockReq, + &MockHostPtr, QueueImplA}; testCommandEnqueue(&MemCpyCmdHost, QueueImplB, MockReq); std::unique_ptr CG{ diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 7a48cc1523b3f..b068a055350df 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -181,9 +181,8 @@ class MockScheduler : public sycl::detail::Scheduler { sycl::detail::UpdateHostRequirementCommand * insertUpdateHostReqCmd(sycl::detail::MemObjRecord *Record, sycl::detail::Requirement *Req, - const sycl::detail::QueueImplPtr &Queue, std::vector &ToEnqueue) { - return MGraphBuilder.insertUpdateHostReqCmd(Record, Req, Queue, ToEnqueue); + return MGraphBuilder.insertUpdateHostReqCmd(Record, Req, ToEnqueue); } sycl::detail::EmptyCommand * diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index 811ebc65b0e00..bd4674e7ca5fc 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -467,6 +467,8 @@ typedef enum ur_function_t { UR_FUNCTION_BINDLESS_IMAGES_GET_IMAGE_SAMPLED_HANDLE_SUPPORT_EXP = 269, /// Enumerator for ::urBindlessImagesGetImageMemoryHandleTypeSupportExp UR_FUNCTION_BINDLESS_IMAGES_GET_IMAGE_MEMORY_HANDLE_TYPE_SUPPORT_EXP = 270, + /// Enumerator for ::urEventHostSignal + UR_FUNCTION_EVENT_HOST_SIGNAL = 271, /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -7518,6 +7520,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( /// [out] a pointer to the native handle of the event. ur_native_handle_t *phNativeEvent); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Signals an event from host +/// +/// @remarks +/// _Analogues_ +/// - **clSetUserEventStatus** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hEvent` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +UR_APIEXPORT ur_result_t UR_APICALL urEventHostSignal( + /// [in] handle of the event object + ur_event_handle_t hEvent); + /////////////////////////////////////////////////////////////////////////////// /// @brief Properties for for ::urEventCreateWithNativeHandle. typedef struct ur_event_native_properties_t { @@ -13200,6 +13223,14 @@ typedef struct ur_event_get_native_handle_params_t { ur_native_handle_t **pphNativeEvent; } ur_event_get_native_handle_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEventHostSignal +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_event_host_signal_params_t { + ur_event_handle_t *phEvent; +} ur_event_host_signal_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urEventCreateWithNativeHandle /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/unified-runtime/include/ur_api_funcs.def b/unified-runtime/include/ur_api_funcs.def index 2ba60864f5940..deb5d3a9cac19 100644 --- a/unified-runtime/include/ur_api_funcs.def +++ b/unified-runtime/include/ur_api_funcs.def @@ -38,6 +38,7 @@ _UR_API(urEventWait) _UR_API(urEventRetain) _UR_API(urEventRelease) _UR_API(urEventGetNativeHandle) +_UR_API(urEventHostSignal) _UR_API(urEventCreateWithNativeHandle) _UR_API(urEventSetCallback) _UR_API(urProgramCreateWithIL) diff --git a/unified-runtime/include/ur_ddi.h b/unified-runtime/include/ur_ddi.h index 553bb61a7e8c7..f4cd9bb7720a0 100644 --- a/unified-runtime/include/ur_ddi.h +++ b/unified-runtime/include/ur_ddi.h @@ -225,6 +225,10 @@ typedef ur_result_t(UR_APICALL *ur_pfnEventRelease_t)(ur_event_handle_t); typedef ur_result_t(UR_APICALL *ur_pfnEventGetNativeHandle_t)( ur_event_handle_t, ur_native_handle_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEventHostSignal +typedef ur_result_t(UR_APICALL *ur_pfnEventHostSignal_t)(ur_event_handle_t); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urEventCreateWithNativeHandle typedef ur_result_t(UR_APICALL *ur_pfnEventCreateWithNativeHandle_t)( @@ -247,6 +251,7 @@ typedef struct ur_event_dditable_t { ur_pfnEventRetain_t pfnRetain; ur_pfnEventRelease_t pfnRelease; ur_pfnEventGetNativeHandle_t pfnGetNativeHandle; + ur_pfnEventHostSignal_t pfnHostSignal; ur_pfnEventCreateWithNativeHandle_t pfnCreateWithNativeHandle; ur_pfnEventSetCallback_t pfnSetCallback; } ur_event_dditable_t; diff --git a/unified-runtime/include/ur_print.h b/unified-runtime/include/ur_print.h index c2dd79cc6ba23..7f8480344c866 100644 --- a/unified-runtime/include/ur_print.h +++ b/unified-runtime/include/ur_print.h @@ -1735,6 +1735,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEventGetNativeHandleParams( const struct ur_event_get_native_handle_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_event_host_signal_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEventHostSignalParams( + const struct ur_event_host_signal_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_event_create_with_native_handle_params_t struct /// @returns diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp index d395f135ee771..dc1bb2eebdc9f 100644 --- a/unified-runtime/include/ur_print.hpp +++ b/unified-runtime/include/ur_print.hpp @@ -1256,6 +1256,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { os << "UR_FUNCTION_BINDLESS_IMAGES_GET_IMAGE_MEMORY_HANDLE_TYPE_SUPPORT_" "EXP"; break; + case UR_FUNCTION_EVENT_HOST_SIGNAL: + os << "UR_FUNCTION_EVENT_HOST_SIGNAL"; + break; default: os << "unknown enumerator"; break; @@ -13030,6 +13033,21 @@ inline std::ostream &operator<<( return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_event_host_signal_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_event_host_signal_params_t *params) { + + os << ".hEvent = "; + + ur::details::printPtr(os, *(params->phEvent)); + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_event_create_with_native_handle_params_t /// type @@ -20965,6 +20983,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, case UR_FUNCTION_EVENT_GET_NATIVE_HANDLE: { os << (const struct ur_event_get_native_handle_params_t *)params; } break; + case UR_FUNCTION_EVENT_HOST_SIGNAL: { + os << (const struct ur_event_host_signal_params_t *)params; + } break; case UR_FUNCTION_EVENT_CREATE_WITH_NATIVE_HANDLE: { os << (const struct ur_event_create_with_native_handle_params_t *)params; } break; diff --git a/unified-runtime/scripts/core/event.yml b/unified-runtime/scripts/core/event.yml index 1b0eeca23e9de..d06d1da7537af 100644 --- a/unified-runtime/scripts/core/event.yml +++ b/unified-runtime/scripts/core/event.yml @@ -282,6 +282,22 @@ returns: - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: - "If the adapter has no underlying equivalent handle." --- #-------------------------------------------------------------------------- +type: function +desc: "Signals an event from host" +class: $xEvent +name: HostSignal +ordinal: "0" +analogue: + - "**clSetUserEventStatus**" +params: + - type: $x_event_handle_t + name: hEvent + desc: "[in] handle of the event object" +returns: + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY +--- #-------------------------------------------------------------------------- type: struct desc: "Properties for for $xEventCreateWithNativeHandle." class: $xQueue diff --git a/unified-runtime/scripts/core/registry.yml b/unified-runtime/scripts/core/registry.yml index e9d030ddeb994..c66aa65d69c1e 100644 --- a/unified-runtime/scripts/core/registry.yml +++ b/unified-runtime/scripts/core/registry.yml @@ -658,6 +658,9 @@ etors: - name: BINDLESS_IMAGES_GET_IMAGE_MEMORY_HANDLE_TYPE_SUPPORT_EXP desc: Enumerator for $xBindlessImagesGetImageMemoryHandleTypeSupportExp value: '270' +- name: EVENT_HOST_SIGNAL + desc: Enumerator for $xEventHostSignal + value: '271' --- type: enum desc: Defines structure types diff --git a/unified-runtime/source/adapters/cuda/event.cpp b/unified-runtime/source/adapters/cuda/event.cpp index dd7e961db79a2..b8e731207803f 100644 --- a/unified-runtime/source/adapters/cuda/event.cpp +++ b/unified-runtime/source/adapters/cuda/event.cpp @@ -306,3 +306,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urEventHostSignal(ur_event_handle_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/unified-runtime/source/adapters/hip/event.cpp b/unified-runtime/source/adapters/hip/event.cpp index 5162df971cfe9..9f88695d1b3f5 100644 --- a/unified-runtime/source/adapters/hip/event.cpp +++ b/unified-runtime/source/adapters/hip/event.cpp @@ -248,6 +248,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback(ur_event_handle_t, return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +UR_APIEXPORT ur_result_t UR_APICALL urEventHostSignal(ur_event_handle_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { const auto RefCount = hEvent->incrementReferenceCount(); diff --git a/unified-runtime/source/adapters/level_zero/event.cpp b/unified-runtime/source/adapters/level_zero/event.cpp index c53bfa1089100..646f230219c77 100644 --- a/unified-runtime/source/adapters/level_zero/event.cpp +++ b/unified-runtime/source/adapters/level_zero/event.cpp @@ -882,6 +882,11 @@ urEventRetain(/** [in] handle of the event object */ ur_event_handle_t Event) { return UR_RESULT_SUCCESS; } +ur_result_t urEventHostSignal(ur_event_handle_t Event) { + auto ZeResult = ZE_CALL_NOCHECK(zeEventHostSignal, (Event->ZeEvent)); + return ze2urResult(ZeResult); +} + ur_result_t urEventRelease(/** [in] handle of the event object */ ur_event_handle_t Event) { @@ -945,8 +950,6 @@ ur_result_t urExtEventCreate( false /*ForceDisableProfiling*/, false)); (*Event)->RefCountExternal++; - if (!(*Event)->CounterBasedEventsEnabled) - ZE2UR_CALL(zeEventHostSignal, ((*Event)->ZeEvent)); return UR_RESULT_SUCCESS; } @@ -967,8 +970,6 @@ ur_result_t urEventCreateWithNativeHandle( false /*ForceDisableProfiling*/, false)); (*Event)->RefCountExternal++; - if (!(*Event)->CounterBasedEventsEnabled) - ZE2UR_CALL(zeEventHostSignal, ((*Event)->ZeEvent)); return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/adapters/level_zero/image.cpp b/unified-runtime/source/adapters/level_zero/image.cpp index 56dbf628cf037..9732d6d3f7dcb 100644 --- a/unified-runtime/source/adapters/level_zero/image.cpp +++ b/unified-runtime/source/adapters/level_zero/image.cpp @@ -20,9 +20,7 @@ #include "loader/ze_loader.h" -namespace { - -} // namespace +namespace {} // namespace namespace ur::level_zero { diff --git a/unified-runtime/source/adapters/level_zero/queue.cpp b/unified-runtime/source/adapters/level_zero/queue.cpp index d9c278732fca6..18bc40355af41 100644 --- a/unified-runtime/source/adapters/level_zero/queue.cpp +++ b/unified-runtime/source/adapters/level_zero/queue.cpp @@ -2044,7 +2044,7 @@ ur_result_t ur_queue_handle_t_::resetCommandList( // If events in the queue are discarded then we can't check their status. // Helper for checking of event completion auto EventCompleted = [](ur_event_handle_t Event) -> bool { - std::scoped_lock EventLock(Event->Mutex); + std::shared_lock EventLock(Event->Mutex); ze_result_t ZeResult = Event->Completed ? ZE_RESULT_SUCCESS diff --git a/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp b/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp index 908d20d6d9305..3bb097c7ec616 100644 --- a/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp @@ -260,6 +260,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( pDdiTable->pfnRetain = ur::level_zero::urEventRetain; pDdiTable->pfnRelease = ur::level_zero::urEventRelease; pDdiTable->pfnGetNativeHandle = ur::level_zero::urEventGetNativeHandle; + pDdiTable->pfnHostSignal = ur::level_zero::urEventHostSignal; pDdiTable->pfnCreateWithNativeHandle = ur::level_zero::urEventCreateWithNativeHandle; pDdiTable->pfnSetCallback = ur::level_zero::urEventSetCallback; diff --git a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp index 0213e94dc8c84..196f91ef5ed62 100644 --- a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp +++ b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp @@ -325,6 +325,7 @@ ur_result_t urEventRetain(ur_event_handle_t hEvent); ur_result_t urEventRelease(ur_event_handle_t hEvent); ur_result_t urEventGetNativeHandle(ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent); +ur_result_t urEventHostSignal(ur_event_handle_t hEvent); ur_result_t urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, ur_context_handle_t hContext, diff --git a/unified-runtime/source/adapters/level_zero/v2/context.cpp b/unified-runtime/source/adapters/level_zero/v2/context.cpp index 4cfd6b7c9de54..db7d2cef30880 100644 --- a/unified-runtime/source/adapters/level_zero/v2/context.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/context.cpp @@ -52,16 +52,16 @@ ur_context_handle_t_::ur_context_handle_t_(ze_context_handle_t hContext, hDevices(phDevices, phDevices + numDevices), commandListCache(hContext, phDevices[0]->Platform->ZeCopyOffloadExtensionSupported), - eventPoolCache(this, phDevices[0]->Platform->getNumDevices(), - [context = this](DeviceId /* deviceId*/, - v2::event_flags_t flags) - -> std::unique_ptr { - assert((flags & v2::EVENT_FLAGS_COUNTER) != 0); - - // TODO: just use per-context id? - return std::make_unique( - context, v2::QUEUE_IMMEDIATE, flags); - }), + eventPoolCache( + this, phDevices[0]->Platform->getNumDevices(), + [context = this](DeviceId /* deviceId*/, v2::event_flags_t flags) + -> std::unique_ptr { + assert((flags & v2::EVENT_FLAGS_COUNTER) != 0); + + // TODO: just use per-context id? + return std::make_unique( + context, v2::QUEUE_IMMEDIATE, flags); + }), nativeEventsPool(this, std::make_unique( this, v2::QUEUE_IMMEDIATE, v2::EVENT_FLAGS_PROFILING_ENABLED)), diff --git a/unified-runtime/source/adapters/level_zero/v2/event.cpp b/unified-runtime/source/adapters/level_zero/v2/event.cpp index ad6c959c20a8d..3984a13a7ee7c 100644 --- a/unified-runtime/source/adapters/level_zero/v2/event.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/event.cpp @@ -229,6 +229,13 @@ ur_result_t urEventRelease(ur_event_handle_t hEvent) try { return exceptionToResult(std::current_exception()); } +ur_result_t urEventHostSignal(ur_event_handle_t hEvent) try { + auto ZeResult = ZE_CALL_NOCHECK(zeEventHostSignal, (hEvent->getZeEvent())); + return ze2urResult(ZeResult); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + ur_result_t urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) try { for (uint32_t i = 0; i < numEvents; ++i) { @@ -387,7 +394,6 @@ urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, v2::EVENT_FLAGS_COUNTER) == 0); *phEvent = hContext->getNativeEventsPool().allocate(); - ZE2UR_CALL(zeEventHostSignal, ((*phEvent)->getZeEvent())); } else { *phEvent = new ur_event_handle_t_(hContext, hNativeEvent, pProperties); } diff --git a/unified-runtime/source/adapters/mock/ur_mockddi.cpp b/unified-runtime/source/adapters/mock/ur_mockddi.cpp index 6634cde2000ff..ce985d5cce175 100644 --- a/unified-runtime/source/adapters/mock/ur_mockddi.cpp +++ b/unified-runtime/source/adapters/mock/ur_mockddi.cpp @@ -5471,6 +5471,48 @@ __urdlllocal ur_result_t UR_APICALL urEventGetNativeHandle( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEventHostSignal +__urdlllocal ur_result_t UR_APICALL urEventHostSignal( + /// [in] handle of the event object + ur_event_handle_t hEvent) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_event_host_signal_params_t params = {&hEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urEventHostSignal")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback("urEventHostSignal")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urEventHostSignal")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEventCreateWithNativeHandle __urdlllocal ur_result_t UR_APICALL urEventCreateWithNativeHandle( @@ -12355,6 +12397,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( pDdiTable->pfnGetNativeHandle = driver::urEventGetNativeHandle; + pDdiTable->pfnHostSignal = driver::urEventHostSignal; + pDdiTable->pfnCreateWithNativeHandle = driver::urEventCreateWithNativeHandle; pDdiTable->pfnSetCallback = driver::urEventSetCallback; diff --git a/unified-runtime/source/adapters/native_cpu/event.cpp b/unified-runtime/source/adapters/native_cpu/event.cpp index 91b8fb302eb18..e93f77ee7473b 100644 --- a/unified-runtime/source/adapters/native_cpu/event.cpp +++ b/unified-runtime/source/adapters/native_cpu/event.cpp @@ -108,6 +108,10 @@ urEnqueueTimestampRecordingExp(ur_queue_handle_t /*hQueue*/, bool /*blocking*/, DIE_NO_IMPLEMENTATION; } +UR_APIEXPORT ur_result_t UR_APICALL urEventHostSignal(ur_event_handle_t) { + DIE_NO_IMPLEMENTATION; +} + ur_event_handle_t_::ur_event_handle_t_(ur_queue_handle_t queue, ur_command_t command_type) : queue(queue), context(queue->getContext()), command_type(command_type), diff --git a/unified-runtime/source/adapters/opencl/event.cpp b/unified-runtime/source/adapters/opencl/event.cpp index dc017ee3947f2..c7b644f029af6 100644 --- a/unified-runtime/source/adapters/opencl/event.cpp +++ b/unified-runtime/source/adapters/opencl/event.cpp @@ -1,4 +1,4 @@ -//===--------- memory.cpp - OpenCL Adapter ---------------------------===// +//===--------- event.cpp - OpenCL Adapter ---------------------------===// // // Copyright (C) 2023 Intel Corporation // @@ -117,11 +117,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( ur_event_handle_t *phEvent) { cl_event NativeHandle = reinterpret_cast(hNativeEvent); try { - auto UREvent = - std::make_unique(NativeHandle, hContext, nullptr); - UREvent->IsNativeHandleOwned = - pProperties ? pProperties->isNativeHandleOwned : false; - *phEvent = UREvent.release(); + if (hNativeEvent) { + auto UREvent = + std::make_unique(NativeHandle, hContext, nullptr); + UREvent->IsNativeHandleOwned = + pProperties ? pProperties->isNativeHandleOwned : false; + *phEvent = UREvent.release(); + } else { + cl_int RetErr{}; + cl_event Event = clCreateUserEvent(hContext->CLContext, &RetErr); + CL_RETURN_ON_FAILURE(RetErr); + auto UREvent = + std::make_unique(Event, hContext, nullptr); + UREvent->IsNativeHandleOwned = true; + *phEvent = UREvent.release(); + } } catch (std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; } catch (...) { @@ -293,3 +303,8 @@ urEnqueueTimestampRecordingExp(ur_queue_handle_t, bool, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL urEventHostSignal(ur_event_handle_t Event) { + CL_RETURN_ON_FAILURE(clSetUserEventStatus(Event->CLEvent, CL_COMPLETE)); + return UR_RESULT_SUCCESS; +} diff --git a/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp b/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp index f4cf124ac7f7d..0627809483b2f 100644 --- a/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp @@ -77,6 +77,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( pDdiTable->pfnRetain = urEventRetain; pDdiTable->pfnSetCallback = urEventSetCallback; pDdiTable->pfnWait = urEventWait; + pDdiTable->pfnHostSignal = urEventHostSignal; return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp index 77e41f75a49e6..a84f779afe439 100644 --- a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp +++ b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp @@ -4460,6 +4460,39 @@ __urdlllocal ur_result_t UR_APICALL urEventGetNativeHandle( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEventHostSignal +__urdlllocal ur_result_t UR_APICALL urEventHostSignal( + /// [in] handle of the event object + ur_event_handle_t hEvent) { + auto pfnHostSignal = getContext()->urDdiTable.Event.pfnHostSignal; + + if (nullptr == pfnHostSignal) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + ur_event_host_signal_params_t params = {&hEvent}; + uint64_t instance = getContext()->notify_begin(UR_FUNCTION_EVENT_HOST_SIGNAL, + "urEventHostSignal", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urEventHostSignal\n"); + + ur_result_t result = pfnHostSignal(hEvent); + + getContext()->notify_end(UR_FUNCTION_EVENT_HOST_SIGNAL, "urEventHostSignal", + ¶ms, &result, instance); + + if (logger.getLevel() <= UR_LOGGER_LEVEL_INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams(args_str, UR_FUNCTION_EVENT_HOST_SIGNAL, + ¶ms); + logger.info(" <--- urEventHostSignal({}) -> {};\n", args_str.str(), + result); + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEventCreateWithNativeHandle __urdlllocal ur_result_t UR_APICALL urEventCreateWithNativeHandle( @@ -10575,6 +10608,9 @@ __urdlllocal ur_result_t UR_APICALL urGetEventProcAddrTable( dditable.pfnGetNativeHandle = pDdiTable->pfnGetNativeHandle; pDdiTable->pfnGetNativeHandle = ur_tracing_layer::urEventGetNativeHandle; + dditable.pfnHostSignal = pDdiTable->pfnHostSignal; + pDdiTable->pfnHostSignal = ur_tracing_layer::urEventHostSignal; + dditable.pfnCreateWithNativeHandle = pDdiTable->pfnCreateWithNativeHandle; pDdiTable->pfnCreateWithNativeHandle = ur_tracing_layer::urEventCreateWithNativeHandle; diff --git a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp index 4b5662fd46f25..22a8aa18c79c2 100644 --- a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp +++ b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp @@ -4384,6 +4384,32 @@ __urdlllocal ur_result_t UR_APICALL urEventGetNativeHandle( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEventHostSignal +__urdlllocal ur_result_t UR_APICALL urEventHostSignal( + /// [in] handle of the event object + ur_event_handle_t hEvent) { + auto pfnHostSignal = getContext()->urDdiTable.Event.pfnHostSignal; + + if (nullptr == pfnHostSignal) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hEvent) + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hEvent)) { + getContext()->refCountContext->logInvalidReference(hEvent); + } + + ur_result_t result = pfnHostSignal(hEvent); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEventCreateWithNativeHandle __urdlllocal ur_result_t UR_APICALL urEventCreateWithNativeHandle( @@ -11487,6 +11513,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( dditable.pfnGetNativeHandle = pDdiTable->pfnGetNativeHandle; pDdiTable->pfnGetNativeHandle = ur_validation_layer::urEventGetNativeHandle; + dditable.pfnHostSignal = pDdiTable->pfnHostSignal; + pDdiTable->pfnHostSignal = ur_validation_layer::urEventHostSignal; + dditable.pfnCreateWithNativeHandle = pDdiTable->pfnCreateWithNativeHandle; pDdiTable->pfnCreateWithNativeHandle = ur_validation_layer::urEventCreateWithNativeHandle; diff --git a/unified-runtime/source/loader/loader.def.in b/unified-runtime/source/loader/loader.def.in index 52b9fc18f863e..3a63cf937f668 100644 --- a/unified-runtime/source/loader/loader.def.in +++ b/unified-runtime/source/loader/loader.def.in @@ -106,6 +106,7 @@ EXPORTS urEventGetInfo urEventGetNativeHandle urEventGetProfilingInfo + urEventHostSignal urEventRelease urEventRetain urEventSetCallback @@ -316,6 +317,7 @@ EXPORTS urPrintEventGetInfoParams urPrintEventGetNativeHandleParams urPrintEventGetProfilingInfoParams + urPrintEventHostSignalParams urPrintEventInfo urPrintEventNativeProperties urPrintEventReleaseParams diff --git a/unified-runtime/source/loader/loader.map.in b/unified-runtime/source/loader/loader.map.in index d1a7c8d190315..a41dc9825abc3 100644 --- a/unified-runtime/source/loader/loader.map.in +++ b/unified-runtime/source/loader/loader.map.in @@ -106,6 +106,7 @@ urEventGetInfo; urEventGetNativeHandle; urEventGetProfilingInfo; + urEventHostSignal; urEventRelease; urEventRetain; urEventSetCallback; @@ -316,6 +317,7 @@ urPrintEventGetInfoParams; urPrintEventGetNativeHandleParams; urPrintEventGetProfilingInfoParams; + urPrintEventHostSignalParams; urPrintEventInfo; urPrintEventNativeProperties; urPrintEventReleaseParams; diff --git a/unified-runtime/source/loader/ur_ldrddi.cpp b/unified-runtime/source/loader/ur_ldrddi.cpp index 8ba14d7b8ad1c..48f1f9c4daf3e 100644 --- a/unified-runtime/source/loader/ur_ldrddi.cpp +++ b/unified-runtime/source/loader/ur_ldrddi.cpp @@ -4347,6 +4347,30 @@ __urdlllocal ur_result_t UR_APICALL urEventGetNativeHandle( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEventHostSignal +__urdlllocal ur_result_t UR_APICALL urEventHostSignal( + /// [in] handle of the event object + ur_event_handle_t hEvent) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hEvent)->dditable; + auto pfnHostSignal = dditable->ur.Event.pfnHostSignal; + if (nullptr == pfnHostSignal) + return UR_RESULT_ERROR_UNINITIALIZED; + + // convert loader handle to platform handle + hEvent = reinterpret_cast(hEvent)->handle; + + // forward to device-platform + result = pfnHostSignal(hEvent); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEventCreateWithNativeHandle __urdlllocal ur_result_t UR_APICALL urEventCreateWithNativeHandle( @@ -10660,6 +10684,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( pDdiTable->pfnRetain = ur_loader::urEventRetain; pDdiTable->pfnRelease = ur_loader::urEventRelease; pDdiTable->pfnGetNativeHandle = ur_loader::urEventGetNativeHandle; + pDdiTable->pfnHostSignal = ur_loader::urEventHostSignal; pDdiTable->pfnCreateWithNativeHandle = ur_loader::urEventCreateWithNativeHandle; pDdiTable->pfnSetCallback = ur_loader::urEventSetCallback; diff --git a/unified-runtime/source/loader/ur_libapi.cpp b/unified-runtime/source/loader/ur_libapi.cpp index 4000e7f483dc0..df289d4eeebf1 100644 --- a/unified-runtime/source/loader/ur_libapi.cpp +++ b/unified-runtime/source/loader/ur_libapi.cpp @@ -4964,6 +4964,35 @@ ur_result_t UR_APICALL urEventGetNativeHandle( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Signals an event from host +/// +/// @remarks +/// _Analogues_ +/// - **clSetUserEventStatus** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hEvent` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urEventHostSignal( + /// [in] handle of the event object + ur_event_handle_t hEvent) try { + auto pfnHostSignal = ur_lib::getContext()->urDdiTable.Event.pfnHostSignal; + if (nullptr == pfnHostSignal) + return UR_RESULT_ERROR_UNINITIALIZED; + + return pfnHostSignal(hEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Create runtime event object from native event handle. /// diff --git a/unified-runtime/source/loader/ur_print.cpp b/unified-runtime/source/loader/ur_print.cpp index eabc4327735a5..39baa9f8c340e 100644 --- a/unified-runtime/source/loader/ur_print.cpp +++ b/unified-runtime/source/loader/ur_print.cpp @@ -1981,6 +1981,15 @@ ur_result_t urPrintEventGetNativeHandleParams( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t +urPrintEventHostSignalParams(const struct ur_event_host_signal_params_t *params, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintEventCreateWithNativeHandleParams( const struct ur_event_create_with_native_handle_params_t *params, char *buffer, const size_t buff_size, size_t *out_size) { diff --git a/unified-runtime/source/ur_api.cpp b/unified-runtime/source/ur_api.cpp index 37c81a3b978c7..cc3d9ab24bbe9 100644 --- a/unified-runtime/source/ur_api.cpp +++ b/unified-runtime/source/ur_api.cpp @@ -4330,6 +4330,30 @@ ur_result_t UR_APICALL urEventGetNativeHandle( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Signals an event from host +/// +/// @remarks +/// _Analogues_ +/// - **clSetUserEventStatus** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hEvent` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urEventHostSignal( + /// [in] handle of the event object + ur_event_handle_t hEvent) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Create runtime event object from native event handle. /// diff --git a/unified-runtime/test/adapters/level_zero/event_cache_tests.cpp b/unified-runtime/test/adapters/level_zero/event_cache_tests.cpp index 7cdd16e341a88..010e851d335cc 100644 --- a/unified-runtime/test/adapters/level_zero/event_cache_tests.cpp +++ b/unified-runtime/test/adapters/level_zero/event_cache_tests.cpp @@ -183,18 +183,14 @@ printFlags(const testing::TestParamInfo &info) { return platform_device_name + "__" + str; } -UUR_DEVICE_TEST_SUITE_WITH_PARAM(urEventCacheTest, - ::testing::Combine( - testing::Values( - 0, UR_QUEUE_FLAG_DISCARD_EVENTS), - testing::Values( - 0, - UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE), - // TODO: why the test fails with - // UR_QUEUE_FLAG_SUBMISSION_BATCHED? - testing:: - Values( - UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE /*, UR_QUEUE_FLAG_SUBMISSION_BATCHED */), - testing::Values( - 0, UR_QUEUE_FLAG_PROFILING_ENABLE)), - printFlags); +UUR_DEVICE_TEST_SUITE_WITH_PARAM( + urEventCacheTest, + ::testing::Combine( + testing::Values(0, UR_QUEUE_FLAG_DISCARD_EVENTS), + testing::Values(0, UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE), + // TODO: why the test fails with + // UR_QUEUE_FLAG_SUBMISSION_BATCHED? + testing::Values( + UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE /*, UR_QUEUE_FLAG_SUBMISSION_BATCHED */), + testing::Values(0, UR_QUEUE_FLAG_PROFILING_ENABLE)), + printFlags);