@@ -36,6 +36,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
36
36
auto size = src.numel () * SizeOfType (src.type ());
37
37
38
38
if (platform::is_cpu_place (src_place) && platform::is_cpu_place (dst_place)) {
39
+ if (src_ptr == dst_ptr) {
40
+ VLOG (3 ) << " Skip copy the same data async from " << src_place << " to "
41
+ << dst_place;
42
+ return ;
43
+ }
39
44
memory::Copy (boost::get<platform::CPUPlace>(dst_place), dst_ptr,
40
45
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
41
46
}
@@ -71,6 +76,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
71
76
auto stream =
72
77
reinterpret_cast <const platform::CUDADeviceContext&>(ctx).stream ();
73
78
if (platform::is_same_place (src_place, dst_place)) {
79
+ if (src_ptr == dst_ptr) {
80
+ VLOG (3 ) << " Skip copy the same data async from " << src_place << " to "
81
+ << dst_place;
82
+ return ;
83
+ }
74
84
memory::Copy (dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
75
85
stream);
76
86
} else {
@@ -115,7 +125,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
115
125
auto size = src.numel () * SizeOfType (src.type ());
116
126
if (platform::is_cpu_place (src_place) && platform::is_cpu_place (dst_place)) {
117
127
if (src_ptr == dst_ptr) {
118
- VLOG (3 ) << " Skip copy the same data from " << src. place () << " to "
128
+ VLOG (3 ) << " Skip copy the same data from " << src_place << " to "
119
129
<< dst_place;
120
130
return ;
121
131
}
@@ -135,14 +145,13 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
135
145
memory::Copy (dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, nullptr );
136
146
} else if (platform::is_gpu_place (src_place) &&
137
147
platform::is_gpu_place (dst_place)) {
138
- auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
139
- auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
140
- if (src_ptr == dst_ptr &&
141
- src_gpu_place.GetDeviceId () == dst_gpu_place.GetDeviceId ()) {
142
- VLOG (3 ) << " Skip copy the same data from " << src.place () << " to "
148
+ if (src_ptr == dst_ptr && platform::is_same_place (src_place, dst_place)) {
149
+ VLOG (3 ) << " Skip copy the same data from " << src_place << " to "
143
150
<< dst_place;
144
151
return ;
145
152
}
153
+ auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
154
+ auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
146
155
memory::Copy (dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr );
147
156
}
148
157
#endif
0 commit comments