Skip to content

Commit c8919d8

Browse files
authored
Merge pull request #10821 from typhoonzero/use_pinned_memory
send use pinned memory
2 parents 1153144 + 8a49a88 commit c8919d8

File tree

1 file changed

+9
-8
lines changed

1 file changed

+9
-8
lines changed

paddle/fluid/operators/detail/sendrecvop_utils.cc

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,13 @@ void GetTensorPayload(framework::Variable* var,
5858
if (platform::is_gpu_place(ctx.GetPlace())) {
5959
#ifdef PADDLE_WITH_CUDA
6060
PADDLE_ENFORCE(platform::is_gpu_place(tensor.place()));
61-
platform::CPUPlace cpu;
61+
platform::CUDAPinnedPlace cuda_pinned;
6262
auto& gpu_dev_ctx = static_cast<const platform::CUDADeviceContext&>(ctx);
6363
auto copy_size = tensor.numel() * framework::SizeOfType(tensor.type());
64-
*payload = memory::Alloc(cpu, copy_size);
64+
*payload = memory::Alloc(cuda_pinned, copy_size);
6565

66-
memory::Copy(cpu, *payload, boost::get<platform::CUDAPlace>(tensor.place()),
66+
memory::Copy(cuda_pinned, *payload,
67+
boost::get<platform::CUDAPlace>(tensor.place()),
6768
reinterpret_cast<const void*>(tensor.data<void>()), copy_size,
6869
gpu_dev_ctx.stream());
6970
ctx.Wait();
@@ -90,11 +91,11 @@ void GetSelectedRowsPayload(framework::Variable* var,
9091
auto* tensor = slr->mutable_value();
9192
if (platform::is_gpu_place(ctx.GetPlace())) {
9293
#ifdef PADDLE_WITH_CUDA
93-
platform::CPUPlace cpu;
94+
platform::CUDAPinnedPlace cuda_pinned;
9495
auto& gpu_dev_ctx = static_cast<const platform::CUDADeviceContext&>(ctx);
9596
auto copy_size = tensor->numel() * framework::SizeOfType(tensor->type());
96-
*payload = memory::Alloc(cpu, copy_size);
97-
memory::Copy(cpu, *payload,
97+
*payload = memory::Alloc(cuda_pinned, copy_size);
98+
memory::Copy(cuda_pinned, *payload,
9899
boost::get<platform::CUDAPlace>(tensor->place()),
99100
reinterpret_cast<const void*>(tensor->data<void>()), copy_size,
100101
gpu_dev_ctx.stream());
@@ -145,8 +146,8 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
145146
// GPU data is copied to CPU buffer when sending,
146147
// free the buffer when possible.
147148
destroy_callback = [](void* backing) {
148-
platform::CPUPlace cpu;
149-
memory::Free(cpu, backing);
149+
platform::CUDAPinnedPlace cuda_pinned;
150+
memory::Free(cuda_pinned, backing);
150151
};
151152
}
152153

0 commit comments

Comments
 (0)