Skip to content

[Offload] Don't create events for empty queues #152304

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions offload/liboffload/src/OffloadImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ struct ol_queue_impl_t {
struct ol_event_impl_t {
ol_event_impl_t(void *EventInfo, ol_queue_handle_t Queue)
: EventInfo(EventInfo), Queue(Queue) {}
// EventInfo may be null, in which case the event should be considered always
// complete
void *EventInfo;
ol_queue_handle_t Queue;
};
Expand Down Expand Up @@ -509,8 +511,8 @@ Error olWaitEvents_impl(ol_queue_handle_t Queue, ol_event_handle_t *Events,
return Plugin::error(ErrorCode::INVALID_NULL_HANDLE,
"olWaitEvents asked to wait on a NULL event");

// Do nothing if the event is for this queue
if (Event->Queue == Queue)
// Do nothing if the event is for this queue or the event is always complete
if (Event->Queue == Queue || !Event->EventInfo)
continue;

if (auto Err = Device->waitEvent(Event->EventInfo, Queue->AsyncInfo))
Expand Down Expand Up @@ -548,15 +550,20 @@ Error olGetQueueInfoSize_impl(ol_queue_handle_t Queue, ol_queue_info_t PropName,
}

Error olSyncEvent_impl(ol_event_handle_t Event) {
if (!Event->EventInfo)
// Event always complete
return Plugin::success();

if (auto Res = Event->Queue->Device->Device->syncEvent(Event->EventInfo))
return Res;

return Error::success();
}

Error olDestroyEvent_impl(ol_event_handle_t Event) {
if (auto Res = Event->Queue->Device->Device->destroyEvent(Event->EventInfo))
return Res;
if (Event->EventInfo)
if (auto Res = Event->Queue->Device->Device->destroyEvent(Event->EventInfo))
return Res;

return olDestroy(Event);
}
Expand Down Expand Up @@ -590,7 +597,16 @@ Error olGetEventInfoSize_impl(ol_event_handle_t Event, ol_event_info_t PropName,
}

Error olCreateEvent_impl(ol_queue_handle_t Queue, ol_event_handle_t *EventOut) {
auto Pending = Queue->Device->Device->hasPendingWork(Queue->AsyncInfo);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated, would be nice to have something nicer than Device->Device->.

if (auto Err = Pending.takeError())
return Err;

*EventOut = new ol_event_impl_t(nullptr, Queue);
if (!*Pending)
// Queue is empty, don't record an event and consider the event always
// complete
return Plugin::success();

if (auto Res = Queue->Device->Device->createEvent(&(*EventOut)->EventInfo))
return Res;

Expand Down
11 changes: 11 additions & 0 deletions offload/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2591,6 +2591,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Event->wait(*Stream);
}

Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
auto Stream = AsyncInfo.getQueueAs<AMDGPUStreamTy *>();
if (!Stream)
return false;

auto Query = Stream->query();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has the side effect of performing cleanup required for the device (freeing any temporary memory, releasing signals, clearing the array, etc.) if the queue happens to be empty. I think this is fine and a sensible thing to do. It should have no observable side effects outside of the RTL.

if (Query)
return !*Query;
return Query.takeError();
}

/// Synchronize the current thread with the event.
Error syncEventImpl(void *EventPtr) override {
AMDGPUEventTy *Event = reinterpret_cast<AMDGPUEventTy *>(EventPtr);
Expand Down
8 changes: 8 additions & 0 deletions offload/plugins-nextgen/common/include/PluginInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,14 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
Error printInfo();
virtual Expected<InfoTreeNode> obtainInfoImpl() = 0;

/// Return true if the device has work that is either queued or currently
/// running
///
/// Devices which cannot report this information should always return true
Expected<bool> hasPendingWork(__tgt_async_info *AsyncInfo);
virtual Expected<bool>
hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) = 0;

/// Getters of the grid values.
uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; }
uint32_t getThreadLimit() const { return GridValues.GV_Max_WG_Size; }
Expand Down
15 changes: 15 additions & 0 deletions offload/plugins-nextgen/common/src/PluginInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1623,6 +1623,21 @@ Error GenericDeviceTy::waitEvent(void *EventPtr, __tgt_async_info *AsyncInfo) {
return Err;
}

Expected<bool> GenericDeviceTy::hasPendingWork(__tgt_async_info *AsyncInfo) {
AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
auto Res = hasPendingWorkImpl(AsyncInfoWrapper);
if (auto Err = Res.takeError()) {
AsyncInfoWrapper.finalize(Err);
return Err;
}

auto Err = Plugin::success();
AsyncInfoWrapper.finalize(Err);
if (Err)
return Err;
return Res;
}

Error GenericDeviceTy::syncEvent(void *EventPtr) {
return syncEventImpl(EventPtr);
}
Expand Down
5 changes: 5 additions & 0 deletions offload/plugins-nextgen/cuda/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,11 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Plugin::check(Res, "error in cuStreamWaitEvent: %s");
}

// TODO: This should be implementable on CUDA
Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
return true;
}

/// Synchronize the current thread with the event.
Error syncEventImpl(void *EventPtr) override {
CUevent Event = reinterpret_cast<CUevent>(EventPtr);
Expand Down
3 changes: 3 additions & 0 deletions offload/plugins-nextgen/host/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,9 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
return Plugin::success();
}
Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
return true;
}
Error syncEventImpl(void *EventPtr) override { return Plugin::success(); }

/// Print information about the device.
Expand Down
Loading