Skip to content

Commit df9a864

Browse files
authored
[Offload] Implement event sync in amdgpu (#149300)
1 parent 534b9cd commit df9a864

File tree

3 files changed

+50
-8
lines changed

3 files changed

+50
-8
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,6 +1115,18 @@ struct AMDGPUStreamTy {
11151115
return Plugin::success();
11161116
}
11171117

1118+
/// Complete pending post actions until and including the event in target
1119+
/// slot.
1120+
Error completeUntil(uint32_t TargetSlot) {
1121+
for (uint32_t Slot = 0; Slot <= TargetSlot; ++Slot) {
1122+
// Take the post action of the operation if any.
1123+
if (auto Err = Slots[Slot].performAction())
1124+
return Err;
1125+
}
1126+
1127+
return Plugin::success();
1128+
}
1129+
11181130
/// Make the current stream wait on a specific operation of another stream.
11191131
/// The idea is to make the current stream waiting on two signals: 1) the last
11201132
/// signal of the current stream, and 2) the last signal of the other stream.
@@ -1502,6 +1514,11 @@ struct AMDGPUStreamTy {
15021514
return complete();
15031515
}
15041516

1517+
/// Synchronize the stream until the given event. The current thread waits
1518+
/// until the provided event is finalized, and it performs the pending post
1519+
/// actions for that and prior events.
1520+
Error synchronizeOn(AMDGPUEventTy &Event);
1521+
15051522
/// Query the stream and complete pending post actions if operations finished.
15061523
/// Return whether all the operations completed. This operation does not block
15071524
/// the calling thread.
@@ -1575,6 +1592,21 @@ struct AMDGPUEventTy {
15751592
return Stream.waitEvent(*this);
15761593
}
15771594

1595+
Error sync() {
1596+
std::lock_guard<std::mutex> Lock(Mutex);
1597+
1598+
if (!RecordedStream)
1599+
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
1600+
"event does not have any recorded stream");
1601+
1602+
// No need to wait on anything, the recorded stream already finished the
1603+
// corresponding operation.
1604+
if (RecordedSlot < 0)
1605+
return Plugin::success();
1606+
1607+
return RecordedStream->synchronizeOn(*this);
1608+
}
1609+
15781610
protected:
15791611
/// The stream registered in this event.
15801612
AMDGPUStreamTy *RecordedStream;
@@ -1630,6 +1662,22 @@ Error AMDGPUStreamTy::waitEvent(const AMDGPUEventTy &Event) {
16301662
return waitOnStreamOperation(RecordedStream, Event.RecordedSlot);
16311663
}
16321664

1665+
Error AMDGPUStreamTy::synchronizeOn(AMDGPUEventTy &Event) {
1666+
std::lock_guard<std::mutex> Lock(Mutex);
1667+
1668+
// Wait until the requested slot has completed
1669+
if (auto Err = Slots[Event.RecordedSlot].Signal->wait(
1670+
StreamBusyWaitMicroseconds, &Device))
1671+
return Err;
1672+
1673+
// If the event is the last one in the stream, just do a full finalize
1674+
if (Event.RecordedSlot == last())
1675+
return complete();
1676+
1677+
// Otherwise, only finalize until the appropriate event
1678+
return completeUntil(Event.RecordedSlot);
1679+
}
1680+
16331681
struct AMDGPUStreamManagerTy final
16341682
: GenericDeviceResourceManagerTy<AMDGPUResourceRef<AMDGPUStreamTy>> {
16351683
using ResourceRef = AMDGPUResourceRef<AMDGPUStreamTy>;
@@ -2540,8 +2588,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
25402588

25412589
/// Synchronize the current thread with the event.
25422590
Error syncEventImpl(void *EventPtr) override {
2543-
return Plugin::error(ErrorCode::UNIMPLEMENTED,
2544-
"synchronize event not implemented");
2591+
AMDGPUEventTy *Event = reinterpret_cast<AMDGPUEventTy *>(EventPtr);
2592+
return Event->sync();
25452593
}
25462594

25472595
/// Print information about the device.

offload/unittests/OffloadAPI/common/Fixtures.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,6 @@ struct OffloadQueueTest : OffloadDeviceTest {
171171
struct OffloadEventTest : OffloadQueueTest {
172172
void SetUp() override {
173173
RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp());
174-
if (getPlatformBackend() == OL_PLATFORM_BACKEND_AMDGPU)
175-
GTEST_SKIP() << "AMDGPU synchronize event not implemented";
176-
177174
// Get an event from a memcpy. We can still use it in olGetEventInfo etc
178175
// after it has been waited on.
179176
void *Alloc;

offload/unittests/OffloadAPI/event/olWaitEvent.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@ using olWaitEventTest = OffloadQueueTest;
1414
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olWaitEventTest);
1515

1616
TEST_P(olWaitEventTest, Success) {
17-
if (getPlatformBackend() == OL_PLATFORM_BACKEND_AMDGPU)
18-
GTEST_SKIP() << "AMDGPU synchronize event not implemented";
19-
2017
uint32_t Src = 42;
2118
void *DstPtr;
2219

0 commit comments

Comments
 (0)