File tree Expand file tree Collapse file tree 3 files changed +36
-11
lines changed Expand file tree Collapse file tree 3 files changed +36
-11
lines changed Original file line number Diff line number Diff line change @@ -257,15 +257,30 @@ void Device::destroyAllStreams() {
257257}
258258
259259// ================================================================================================
260- void Device::SyncAllStreams ( bool cpu_wait) {
260+ void Device::SyncAllStreams (bool cpu_wait, bool wait_blocking_streams_only ) {
261261 // Make a local copy to avoid stalls for GPU finish with multiple threads
262262 std::vector<hip::Stream*> streams;
263263 streams.reserve (streamSet.size ());
264264 {
265265 amd::ScopedLock lock (streamSetLock);
266- for (auto it : streamSet) {
267- streams.push_back (it);
268- it->retain ();
266+ if (wait_blocking_streams_only) {
267+ auto null_stream = GetNullStream ();
268+ for (auto it : streamSet) {
269+ if (it != null_stream && (it->Flags () & hipStreamNonBlocking) == 0 ) {
270+ streams.push_back (it);
271+ it->retain ();
272+ }
273+ }
274+ // Add null stream to the end of the list so that wait happens after all blocking streams.
275+ if (null_stream != nullptr ) {
276+ streams.push_back (null_stream);
277+ null_stream->retain ();
278+ }
279+ } else {
280+ for (auto it : streamSet) {
281+ streams.push_back (it);
282+ it->retain ();
283+ }
269284 }
270285 }
271286 for (auto it : streams) {
Original file line number Diff line number Diff line change @@ -595,7 +595,7 @@ class stream_per_thread {
595595
596596 void destroyAllStreams ();
597597
598- void SyncAllStreams ( bool cpu_wait = true );
598+ void SyncAllStreams ( bool cpu_wait = true , bool wait_blocking_streams_only = false );
599599
600600 bool StreamCaptureBlocking ();
601601
Original file line number Diff line number Diff line change @@ -357,13 +357,23 @@ hipError_t hipStreamSynchronize_common(hipStream_t stream) {
357357 HIP_RETURN (hipErrorStreamCaptureUnsupported);
358358 }
359359 }
360- bool wait = (stream == nullptr || stream == hipStreamLegacy) ? true : false ;
361- auto hip_stream = hip::getStream (stream, wait);
362360
363- // Wait for the current host queue
364- hip_stream->finish ();
365- // Release freed memory for all memory pools on the device
366- hip_stream->GetDevice ()->ReleaseFreedMemory ();
361+ if (stream == nullptr ) {
362+ // Do cpu wait on null stream and only on blocking streams
363+ constexpr bool WaitblockingStreamOnly = true ;
364+ getCurrentDevice ()->SyncAllStreams (true , WaitblockingStreamOnly);
365+
366+ // Release freed memory for all memory pools on the device
367+ getCurrentDevice ()->ReleaseFreedMemory ();
368+ } else {
369+ constexpr bool wait = false ;
370+ auto hip_stream = hip::getStream (stream, wait);
371+
372+ // Wait for the current host queue
373+ hip_stream->finish ();
374+ // Release freed memory for all memory pools on the device
375+ hip_stream->GetDevice ()->ReleaseFreedMemory ();
376+ }
367377 return hipSuccess;
368378}
369379
You can’t perform that action at this time.
0 commit comments