@@ -73,18 +73,12 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
73
73
memory::Copy (dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
74
74
stream);
75
75
} else {
76
- // NOTE(zcd): Because TensorCopy is an async operation, when the src_place
77
- // and dst_place are two different GPU, to ensure that the operation can
78
- // be carried out correctly, we should make ctx wait.
79
- // If ctx_place and src_place are the same, we should add ctx.Wait()
80
- // after memory::Copy; if ctx_place and dst_place are the same, we should
81
- // add ctx.Wait() before memory::Copy.
82
76
if (platform::is_same_place (ctx_place, src_place)) {
83
77
memory::Copy (dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
84
78
stream);
85
- ctx. Wait ();
79
+ platform::DeviceContextPool::Instance (). Get (src. place ())-> Wait ();
86
80
} else if (platform::is_same_place (ctx_place, dst_place)) {
87
- ctx. Wait ();
81
+ platform::DeviceContextPool::Instance (). Get (src. place ())-> Wait ();
88
82
memory::Copy (dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
89
83
stream);
90
84
} else {
@@ -97,13 +91,6 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
97
91
98
92
void TensorCopy (const Tensor& src, const platform::Place& dst_place,
99
93
Tensor* dst) {
100
- // NOTE(zcd): If the src.place() and dst_place are two different GPU,
101
- // the copy operation is carried out on the dst_place's stream. This is
102
- // very important, because TensorCopy is an async operator, and in most
103
- // case, once this copy operator returns, dst is to be used in dst_place's
104
- // stream, if this copy operation is carried out on the src_place's stream,
105
- // when dst is used in dst_place's stream the copy operation may be
106
- // not completed.
107
94
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance ();
108
95
const platform::DeviceContext* dev_ctx;
109
96
if (platform::is_gpu_place (dst_place)) {
0 commit comments