@@ -1115,6 +1115,18 @@ struct AMDGPUStreamTy {
1115
1115
return Plugin::success ();
1116
1116
}
1117
1117
1118
+ // / Complete pending post actions until and including the event in target
1119
+ // / slot.
1120
+ Error completeUntil (uint32_t TargetSlot) {
1121
+ for (uint32_t Slot = 0 ; Slot <= TargetSlot; ++Slot) {
1122
+ // Take the post action of the operation if any.
1123
+ if (auto Err = Slots[Slot].performAction ())
1124
+ return Err;
1125
+ }
1126
+
1127
+ return Plugin::success ();
1128
+ }
1129
+
1118
1130
// / Make the current stream wait on a specific operation of another stream.
1119
1131
// / The idea is to make the current stream waiting on two signals: 1) the last
1120
1132
// / signal of the current stream, and 2) the last signal of the other stream.
@@ -1502,6 +1514,11 @@ struct AMDGPUStreamTy {
1502
1514
return complete ();
1503
1515
}
1504
1516
1517
+ // / Synchronize the stream until the given event. The current thread waits
1518
+ // / until the provided event is finalized, and it performs the pending post
1519
+ // / actions for that and prior events.
1520
+ Error synchronizeOn (AMDGPUEventTy &Event);
1521
+
1505
1522
// / Query the stream and complete pending post actions if operations finished.
1506
1523
// / Return whether all the operations completed. This operation does not block
1507
1524
// / the calling thread.
@@ -1575,6 +1592,21 @@ struct AMDGPUEventTy {
1575
1592
return Stream.waitEvent (*this );
1576
1593
}
1577
1594
1595
+ Error sync () {
1596
+ std::lock_guard<std::mutex> Lock (Mutex);
1597
+
1598
+ if (!RecordedStream)
1599
+ return Plugin::error (ErrorCode::INVALID_ARGUMENT,
1600
+ " event does not have any recorded stream" );
1601
+
1602
+ // No need to wait on anything, the recorded stream already finished the
1603
+ // corresponding operation.
1604
+ if (RecordedSlot < 0 )
1605
+ return Plugin::success ();
1606
+
1607
+ return RecordedStream->synchronizeOn (*this );
1608
+ }
1609
+
1578
1610
protected:
1579
1611
// / The stream registered in this event.
1580
1612
AMDGPUStreamTy *RecordedStream;
@@ -1630,6 +1662,22 @@ Error AMDGPUStreamTy::waitEvent(const AMDGPUEventTy &Event) {
1630
1662
return waitOnStreamOperation (RecordedStream, Event.RecordedSlot );
1631
1663
}
1632
1664
1665
+ Error AMDGPUStreamTy::synchronizeOn (AMDGPUEventTy &Event) {
1666
+ std::lock_guard<std::mutex> Lock (Mutex);
1667
+
1668
+ // Wait until the requested slot has completed
1669
+ if (auto Err = Slots[Event.RecordedSlot ].Signal ->wait (
1670
+ StreamBusyWaitMicroseconds, &Device))
1671
+ return Err;
1672
+
1673
+ // If the event is the last one in the stream, just do a full finalize
1674
+ if (Event.RecordedSlot == last ())
1675
+ return complete ();
1676
+
1677
+ // Otherwise, only finalize until the appropriate event
1678
+ return completeUntil (Event.RecordedSlot );
1679
+ }
1680
+
1633
1681
struct AMDGPUStreamManagerTy final
1634
1682
: GenericDeviceResourceManagerTy<AMDGPUResourceRef<AMDGPUStreamTy>> {
1635
1683
using ResourceRef = AMDGPUResourceRef<AMDGPUStreamTy>;
@@ -2540,8 +2588,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
2540
2588
2541
2589
// / Synchronize the current thread with the event.
2542
2590
Error syncEventImpl (void *EventPtr) override {
2543
- return Plugin::error (ErrorCode::UNIMPLEMENTED,
2544
- " synchronize event not implemented " );
2591
+ AMDGPUEventTy *Event = reinterpret_cast <AMDGPUEventTy *>(EventPtr);
2592
+ return Event-> sync ( );
2545
2593
}
2546
2594
2547
2595
// / Print information about the device.
0 commit comments