@@ -1115,6 +1115,18 @@ struct AMDGPUStreamTy {
11151115 return Plugin::success ();
11161116 }
11171117
1118+ // / Complete pending post actions until and including the event in target
1119+ // / slot.
1120+ Error completeUntil (uint32_t TargetSlot) {
1121+ for (uint32_t Slot = 0 ; Slot <= TargetSlot; ++Slot) {
1122+ // Take the post action of the operation if any.
1123+ if (auto Err = Slots[Slot].performAction ())
1124+ return Err;
1125+ }
1126+
1127+ return Plugin::success ();
1128+ }
1129+
11181130 // / Make the current stream wait on a specific operation of another stream.
11191131 // / The idea is to make the current stream waiting on two signals: 1) the last
11201132 // / signal of the current stream, and 2) the last signal of the other stream.
@@ -1502,6 +1514,11 @@ struct AMDGPUStreamTy {
15021514 return complete ();
15031515 }
15041516
1517+ // / Synchronize the stream until the given event. The current thread waits
1518+ // / until the provided event is finalized, and it performs the pending post
1519+ // / actions for that and prior events.
1520+ Error synchronizeOn (AMDGPUEventTy &Event);
1521+
15051522 // / Query the stream and complete pending post actions if operations finished.
15061523 // / Return whether all the operations completed. This operation does not block
15071524 // / the calling thread.
@@ -1575,6 +1592,21 @@ struct AMDGPUEventTy {
15751592 return Stream.waitEvent (*this );
15761593 }
15771594
1595+ Error sync () {
1596+ std::lock_guard<std::mutex> Lock (Mutex);
1597+
1598+ if (!RecordedStream)
1599+ return Plugin::error (ErrorCode::INVALID_ARGUMENT,
1600+ " event does not have any recorded stream" );
1601+
1602+ // No need to wait on anything, the recorded stream already finished the
1603+ // corresponding operation.
1604+ if (RecordedSlot < 0 )
1605+ return Plugin::success ();
1606+
1607+ return RecordedStream->synchronizeOn (*this );
1608+ }
1609+
15781610protected:
15791611 // / The stream registered in this event.
15801612 AMDGPUStreamTy *RecordedStream;
@@ -1630,6 +1662,22 @@ Error AMDGPUStreamTy::waitEvent(const AMDGPUEventTy &Event) {
16301662 return waitOnStreamOperation (RecordedStream, Event.RecordedSlot );
16311663}
16321664
1665+ Error AMDGPUStreamTy::synchronizeOn (AMDGPUEventTy &Event) {
1666+ std::lock_guard<std::mutex> Lock (Mutex);
1667+
1668+ // Wait until the requested slot has completed
1669+ if (auto Err = Slots[Event.RecordedSlot ].Signal ->wait (
1670+ StreamBusyWaitMicroseconds, &Device))
1671+ return Err;
1672+
1673+ // If the event is the last one in the stream, just do a full finalize
1674+ if (Event.RecordedSlot == last ())
1675+ return complete ();
1676+
1677+ // Otherwise, only finalize until the appropriate event
1678+ return completeUntil (Event.RecordedSlot );
1679+ }
1680+
16331681struct AMDGPUStreamManagerTy final
16341682 : GenericDeviceResourceManagerTy<AMDGPUResourceRef<AMDGPUStreamTy>> {
16351683 using ResourceRef = AMDGPUResourceRef<AMDGPUStreamTy>;
@@ -2540,8 +2588,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
25402588
25412589 // / Synchronize the current thread with the event.
25422590 Error syncEventImpl (void *EventPtr) override {
2543- return Plugin::error (ErrorCode::UNIMPLEMENTED,
2544- " synchronize event not implemented " );
2591+ AMDGPUEventTy *Event = reinterpret_cast <AMDGPUEventTy *>(EventPtr);
2592+ return Event-> sync ( );
25452593 }
25462594
25472595 // / Print information about the device.
0 commit comments