diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index 6486b2b6d13a6..272a12ab59a06 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -72,6 +72,8 @@ struct ol_queue_impl_t { struct ol_event_impl_t { ol_event_impl_t(void *EventInfo, ol_queue_handle_t Queue) : EventInfo(EventInfo), Queue(Queue) {} + // EventInfo may be null, in which case the event should be considered always + // complete void *EventInfo; ol_queue_handle_t Queue; }; @@ -509,8 +511,8 @@ Error olWaitEvents_impl(ol_queue_handle_t Queue, ol_event_handle_t *Events, return Plugin::error(ErrorCode::INVALID_NULL_HANDLE, "olWaitEvents asked to wait on a NULL event"); - // Do nothing if the event is for this queue - if (Event->Queue == Queue) + // Do nothing if the event is for this queue or the event is always complete + if (Event->Queue == Queue || !Event->EventInfo) continue; if (auto Err = Device->waitEvent(Event->EventInfo, Queue->AsyncInfo)) @@ -548,6 +550,10 @@ Error olGetQueueInfoSize_impl(ol_queue_handle_t Queue, ol_queue_info_t PropName, } Error olSyncEvent_impl(ol_event_handle_t Event) { + if (!Event->EventInfo) + // Event always complete + return Plugin::success(); + if (auto Res = Event->Queue->Device->Device->syncEvent(Event->EventInfo)) return Res; @@ -555,8 +561,9 @@ Error olSyncEvent_impl(ol_event_handle_t Event) { } Error olDestroyEvent_impl(ol_event_handle_t Event) { - if (auto Res = Event->Queue->Device->Device->destroyEvent(Event->EventInfo)) - return Res; + if (Event->EventInfo) + if (auto Res = Event->Queue->Device->Device->destroyEvent(Event->EventInfo)) + return Res; return olDestroy(Event); } @@ -590,7 +597,16 @@ Error olGetEventInfoSize_impl(ol_event_handle_t Event, ol_event_info_t PropName, } Error olCreateEvent_impl(ol_queue_handle_t Queue, ol_event_handle_t *EventOut) { + auto Pending = Queue->Device->Device->hasPendingWork(Queue->AsyncInfo); + if (auto Err = Pending.takeError()) + return Err; + *EventOut = new ol_event_impl_t(nullptr, Queue); + if (!*Pending) + // Queue is empty, don't record an event and consider the event always + // complete + return Plugin::success(); + if (auto Res = Queue->Device->Device->createEvent(&(*EventOut)->EventInfo)) return Res; diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index b7bfa89fc9ea6..833c56df21ac3 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2591,6 +2591,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Event->wait(*Stream); } + Expected hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override { + auto Stream = AsyncInfo.getQueueAs(); + if (!Stream) + return false; + + auto Query = Stream->query(); + if (Query) + return !*Query; + return Query.takeError(); + } + /// Synchronize the current thread with the event. Error syncEventImpl(void *EventPtr) override { AMDGPUEventTy *Event = reinterpret_cast(EventPtr); diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 8c17a2ee07047..99e7a13ff2b37 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -939,6 +939,14 @@ struct GenericDeviceTy : public DeviceAllocatorTy { Error printInfo(); virtual Expected obtainInfoImpl() = 0; + /// Return true if the device has work that is either queued or currently + /// running + /// + /// Devices which cannot report this information should always return true + Expected hasPendingWork(__tgt_async_info *AsyncInfo); + virtual Expected + hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) = 0; + /// Getters of the grid values. uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; } uint32_t getThreadLimit() const { return GridValues.GV_Max_WG_Size; } diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 94a050b559efe..cce2405cd51c4 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -1623,6 +1623,21 @@ Error GenericDeviceTy::waitEvent(void *EventPtr, __tgt_async_info *AsyncInfo) { return Err; } +Expected GenericDeviceTy::hasPendingWork(__tgt_async_info *AsyncInfo) { + AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo); + auto Res = hasPendingWorkImpl(AsyncInfoWrapper); + if (auto Err = Res.takeError()) { + AsyncInfoWrapper.finalize(Err); + return Err; + } + + auto Err = Plugin::success(); + AsyncInfoWrapper.finalize(Err); + if (Err) + return Err; + return Res; +} + Error GenericDeviceTy::syncEvent(void *EventPtr) { return syncEventImpl(EventPtr); } diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index c5f31670079ae..7649fd9285bb5 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -916,6 +916,11 @@ struct CUDADeviceTy : public GenericDeviceTy { return Plugin::check(Res, "error in cuStreamWaitEvent: %s"); } + // TODO: This should be implementable on CUDA + Expected hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override { + return true; + } + /// Synchronize the current thread with the event. Error syncEventImpl(void *EventPtr) override { CUevent Event = reinterpret_cast(EventPtr); diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp index d950572265b4c..9abc3507f6e68 100644 --- a/offload/plugins-nextgen/host/src/rtl.cpp +++ b/offload/plugins-nextgen/host/src/rtl.cpp @@ -333,6 +333,9 @@ struct GenELF64DeviceTy : public GenericDeviceTy { AsyncInfoWrapperTy &AsyncInfoWrapper) override { return Plugin::success(); } + Expected hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override { + return true; + } Error syncEventImpl(void *EventPtr) override { return Plugin::success(); } /// Print information about the device.