Skip to content

Commit 9fbe90e

Browse files
committed
fix according to comments
1 parent 9365d11 commit 9fbe90e

File tree

1 file changed

+48
-46
lines changed

1 file changed

+48
-46
lines changed

paddle/fluid/operators/nccl_op_test.cu.cc

Lines changed: 48 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -236,49 +236,51 @@ TEST_F(NCCLTester, ncclReduceOp) {
236236
}
237237

238238
// ncclBcastOp with desc
239-
// TODO(helin): enable the test for ncclBcastOp
240-
// TEST_F(NCCLTester, ncclBcastOp) {
241-
// std::unique_ptr<f::OpDesc> op2(new f::OpDesc);
242-
// const int kRoot = 0;
243-
// op2->SetType("ncclBcast");
244-
// op2->SetInput("X", {"st"});
245-
// op2->SetInput("Communicator", {"comm"});
246-
// op2->SetOutput("Out", {"rt"});
247-
// op2->SetAttr("root", kRoot);
248-
249-
// std::vector<f::Scope *> dev_scopes;
250-
251-
// std::vector<std::thread> ths;
252-
253-
// for (size_t i = 0; i < gpu_list_.size(); ++i) {
254-
// dev_scopes.emplace_back(&g_scope_.NewScope());
255-
// std::thread th(&NCCLTester::PerThreadProgram<float>, this, gpu_list_[i],
256-
// *op2.get(), dev_scopes[i]);
257-
// ths.emplace_back(std::move(th));
258-
// }
259-
260-
// for (size_t i = 0; i < gpu_list_.size(); ++i) {
261-
// ths[i].join();
262-
// }
263-
264-
// const int idx = 1;
265-
// float result = GetGPUData(kRoot);
266-
267-
// p::CPUPlace cpu_place;
268-
// p::CUDAPlace gpu_place(gpu_list_[idx]);
269-
270-
// auto &recv_tensor = dev_scopes[idx]->FindVar("rt")->Get<f::LoDTensor>();
271-
// auto *rt = recv_tensor.data<float>();
272-
// auto *result_tensor = dev_scopes[idx]->Var("ct")->GetMutable<f::LoDTensor>();
273-
// result_tensor->Resize(kDims);
274-
// auto *ct = result_tensor->mutable_data<float>(cpu_place);
275-
276-
// paddle::memory::Copy(
277-
// cpu_place, ct, p::CUDAPlace(gpu_list_[idx]), rt,
278-
// recv_tensor.numel() * sizeof(float),
279-
// static_cast<p::CUDADeviceContext *>(dev_ctxs_[idx])->stream());
280-
281-
// for (int64_t j = 0; j < f::product(kDims); ++j) {
282-
// ASSERT_NEAR(ct[j], result, 1e-5);
283-
// }
284-
// }
239+
// TODO(helin): https://github.com/PaddlePaddle/Paddle/issues/9540
240+
/*
241+
TEST_F(NCCLTester, ncclBcastOp) {
242+
std::unique_ptr<f::OpDesc> op2(new f::OpDesc);
243+
const int kRoot = 0;
244+
op2->SetType("ncclBcast");
245+
op2->SetInput("X", {"st"});
246+
op2->SetInput("Communicator", {"comm"});
247+
op2->SetOutput("Out", {"rt"});
248+
op2->SetAttr("root", kRoot);
249+
250+
std::vector<f::Scope *> dev_scopes;
251+
252+
std::vector<std::thread> ths;
253+
254+
for (size_t i = 0; i < gpu_list_.size(); ++i) {
255+
dev_scopes.emplace_back(&g_scope_.NewScope());
256+
std::thread th(&NCCLTester::PerThreadProgram<float>, this, gpu_list_[i],
257+
*op2.get(), dev_scopes[i]);
258+
ths.emplace_back(std::move(th));
259+
}
260+
261+
for (size_t i = 0; i < gpu_list_.size(); ++i) {
262+
ths[i].join();
263+
}
264+
265+
const int idx = 1;
266+
float result = GetGPUData(kRoot);
267+
268+
p::CPUPlace cpu_place;
269+
p::CUDAPlace gpu_place(gpu_list_[idx]);
270+
271+
auto &recv_tensor = dev_scopes[idx]->FindVar("rt")->Get<f::LoDTensor>();
272+
auto *rt = recv_tensor.data<float>();
273+
auto *result_tensor = dev_scopes[idx]->Var("ct")->GetMutable<f::LoDTensor>();
274+
result_tensor->Resize(kDims);
275+
auto *ct = result_tensor->mutable_data<float>(cpu_place);
276+
277+
paddle::memory::Copy(
278+
cpu_place, ct, p::CUDAPlace(gpu_list_[idx]), rt,
279+
recv_tensor.numel() * sizeof(float),
280+
static_cast<p::CUDADeviceContext *>(dev_ctxs_[idx])->stream());
281+
282+
for (int64_t j = 0; j < f::product(kDims); ++j) {
283+
ASSERT_NEAR(ct[j], result, 1e-5);
284+
}
285+
}
286+
*/

0 commit comments

Comments
 (0)