Skip to content

Commit 1185a96

Browse files
authored
[cherry-pick 1.8]fix randomly hang issue of PaddleDetection training task on windows (#24980)
* cherry-pick #24977
1 parent 9fd1dd0 commit 1185a96

File tree

1 file changed

+14
-2
lines changed

1 file changed

+14
-2
lines changed

paddle/fluid/memory/memcpy.cc

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,18 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
3232
#ifdef PADDLE_WITH_CUDA
3333
static constexpr size_t kMaxGpuAsyncCopyBytes = 64 * 1024; // 64K
3434

35+
inline void SyncCUDAStream() {
36+
#if !defined(_WIN32)
37+
cudaStreamSynchronize(0);
38+
#else
39+
cudaError_t e_sync = cudaSuccess;
40+
while (e_sync = cudaStreamQuery(0)) {
41+
if (e_sync == cudaErrorNotReady) continue;
42+
break;
43+
}
44+
#endif
45+
}
46+
3547
// NOTE(zcd): Do not use GpuMemcpySync as much as possible.
3648
// because GpuMemcpySync issues the copying command to the default stream,
3749
// which will make two commands from different streams cannot run concurrently.
@@ -55,7 +67,7 @@ void Copy<platform::CPUPlace, platform::CUDAPlace>(
5567
platform::GpuMemcpySync(dst, src, num, cudaMemcpyDeviceToHost);
5668
// FIXME(zjl): do we really need it?
5769
if (num <= kMaxGpuAsyncCopyBytes) {
58-
cudaStreamSynchronize(0);
70+
SyncCUDAStream();
5971
}
6072
}
6173
}
@@ -77,7 +89,7 @@ void Copy<platform::CUDAPlace, platform::CPUPlace>(
7789
platform::GpuMemcpySync(dst, src, num, cudaMemcpyHostToDevice);
7890
// FIXME(zjl): do we really need it?
7991
if (num <= kMaxGpuAsyncCopyBytes) {
80-
cudaStreamSynchronize(0);
92+
SyncCUDAStream();
8193
}
8294
}
8395
}

0 commit comments

Comments
 (0)