diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 12c7cc62905c9..b2fd950c9d500 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -1115,6 +1115,18 @@ struct AMDGPUStreamTy { return Plugin::success(); } + /// Complete pending post actions until and including the event in target + /// slot. + Error completeUntil(uint32_t TargetSlot) { + for (uint32_t Slot = 0; Slot <= TargetSlot; ++Slot) { + // Take the post action of the operation if any. + if (auto Err = Slots[Slot].performAction()) + return Err; + } + + return Plugin::success(); + } + /// Make the current stream wait on a specific operation of another stream. /// The idea is to make the current stream waiting on two signals: 1) the last /// signal of the current stream, and 2) the last signal of the other stream. @@ -1502,6 +1514,11 @@ struct AMDGPUStreamTy { return complete(); } + /// Synchronize the stream until the given event. The current thread waits + /// until the provided event is finalized, and it performs the pending post + /// actions for that and prior events. + Error synchronizeOn(AMDGPUEventTy &Event); + /// Query the stream and complete pending post actions if operations finished. /// Return whether all the operations completed. This operation does not block /// the calling thread. @@ -1575,6 +1592,21 @@ struct AMDGPUEventTy { return Stream.waitEvent(*this); } + Error sync() { + std::lock_guard Lock(Mutex); + + if (!RecordedStream) + return Plugin::error(ErrorCode::INVALID_ARGUMENT, + "event does not have any recorded stream"); + + // No need to wait on anything, the recorded stream already finished the + // corresponding operation. + if (RecordedSlot < 0) + return Plugin::success(); + + return RecordedStream->synchronizeOn(*this); + } + protected: /// The stream registered in this event. AMDGPUStreamTy *RecordedStream; @@ -1630,6 +1662,22 @@ Error AMDGPUStreamTy::waitEvent(const AMDGPUEventTy &Event) { return waitOnStreamOperation(RecordedStream, Event.RecordedSlot); } +Error AMDGPUStreamTy::synchronizeOn(AMDGPUEventTy &Event) { + std::lock_guard Lock(Mutex); + + // Wait until the requested slot has completed + if (auto Err = Slots[Event.RecordedSlot].Signal->wait( + StreamBusyWaitMicroseconds, &Device)) + return Err; + + // If the event is the last one in the stream, just do a full finalize + if (Event.RecordedSlot == last()) + return complete(); + + // Otherwise, only finalize until the appropriate event + return completeUntil(Event.RecordedSlot); +} + struct AMDGPUStreamManagerTy final : GenericDeviceResourceManagerTy> { using ResourceRef = AMDGPUResourceRef; @@ -2540,8 +2588,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { /// Synchronize the current thread with the event. Error syncEventImpl(void *EventPtr) override { - return Plugin::error(ErrorCode::UNIMPLEMENTED, - "synchronize event not implemented"); + AMDGPUEventTy *Event = reinterpret_cast(EventPtr); + return Event->sync(); } /// Print information about the device. diff --git a/offload/unittests/OffloadAPI/common/Fixtures.hpp b/offload/unittests/OffloadAPI/common/Fixtures.hpp index e5d815ecda965..546921164f691 100644 --- a/offload/unittests/OffloadAPI/common/Fixtures.hpp +++ b/offload/unittests/OffloadAPI/common/Fixtures.hpp @@ -171,9 +171,6 @@ struct OffloadQueueTest : OffloadDeviceTest { struct OffloadEventTest : OffloadQueueTest { void SetUp() override { RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp()); - if (getPlatformBackend() == OL_PLATFORM_BACKEND_AMDGPU) - GTEST_SKIP() << "AMDGPU synchronize event not implemented"; - // Get an event from a memcpy. We can still use it in olGetEventInfo etc // after it has been waited on. void *Alloc; diff --git a/offload/unittests/OffloadAPI/event/olWaitEvent.cpp b/offload/unittests/OffloadAPI/event/olWaitEvent.cpp index 05356d4ef8d75..f80dabb4fc93f 100644 --- a/offload/unittests/OffloadAPI/event/olWaitEvent.cpp +++ b/offload/unittests/OffloadAPI/event/olWaitEvent.cpp @@ -14,9 +14,6 @@ using olWaitEventTest = OffloadQueueTest; OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olWaitEventTest); TEST_P(olWaitEventTest, Success) { - if (getPlatformBackend() == OL_PLATFORM_BACKEND_AMDGPU) - GTEST_SKIP() << "AMDGPU synchronize event not implemented"; - uint32_t Src = 42; void *DstPtr;