@@ -44,7 +44,9 @@ struct TestReduceOpHandle {
44
44
ctxs_[j]->Wait ();
45
45
}
46
46
#ifdef PADDLE_WITH_CUDA
47
- nccl_ctxs_->WaitAll ();
47
+ if (nccl_ctxs_) {
48
+ nccl_ctxs_->WaitAll ();
49
+ }
48
50
#endif
49
51
}
50
52
@@ -64,6 +66,7 @@ struct TestReduceOpHandle {
64
66
gpu_list_.push_back (p);
65
67
ctxs_.emplace_back (new p::CUDADeviceContext (p));
66
68
}
69
+ nccl_ctxs_.reset (new platform::NCCLContextMap (gpu_list_));
67
70
#else
68
71
PADDLE_THROW (" CUDA is not support." );
69
72
#endif
@@ -74,10 +77,10 @@ struct TestReduceOpHandle {
74
77
gpu_list_.push_back (p);
75
78
ctxs_.emplace_back (new p::CPUDeviceContext (p));
76
79
}
77
- }
78
80
#ifdef PADDLE_WITH_CUDA
79
- nccl_ctxs_.reset (new platform::NCCLContextMap (gpu_list_) );
81
+ nccl_ctxs_.reset (nullptr );
80
82
#endif
83
+ }
81
84
}
82
85
83
86
void InitReduceOp (size_t input_scope_idx) {
@@ -87,15 +90,27 @@ struct TestReduceOpHandle {
87
90
}
88
91
local_scopes_[input_scope_idx]->Var (" input" );
89
92
93
+ if (use_gpu_) {
94
+ #ifdef PADDLE_WITH_CUDA
95
+ op_handle_.reset (
96
+ new ReduceOpHandle (local_scopes_, gpu_list_, nccl_ctxs_.get ()));
97
+ #else
98
+ PADDLE_THROW (" CUDA is not support." );
99
+ #endif
100
+ } else {
90
101
#ifdef PADDLE_WITH_CUDA
91
- op_handle_.reset (new ReduceOpHandle (local_scopes_, gpu_list_, *nccl_ctxs_));
102
+ op_handle_.reset (
103
+ new ReduceOpHandle (local_scopes_, gpu_list_, nccl_ctxs_.get ()));
92
104
#else
93
- op_handle_.reset (new ReduceOpHandle (local_scopes_, gpu_list_));
105
+ op_handle_.reset (new ReduceOpHandle (local_scopes_, gpu_list_));
94
106
#endif
107
+ }
95
108
96
109
// add input
97
110
for (size_t j = 0 ; j < gpu_list_.size (); ++j) {
98
- op_handle_->dev_ctxes_ [gpu_list_[j]] = ctxs_[j].get ();
111
+ if (!use_gpu_) {
112
+ op_handle_->dev_ctxes_ [gpu_list_[j]] = ctxs_[j].get ();
113
+ }
99
114
vars_.emplace_back (new VarHandle ());
100
115
VarHandle *in_var_handle = static_cast <VarHandle *>(vars_.back ().get ());
101
116
in_var_handle->place_ = gpu_list_[j];
@@ -236,25 +251,31 @@ TEST(ReduceTester, TestCPUReduceTestSelectedRows) {
236
251
test_op.InitReduceOp (input_scope_idx);
237
252
test_op.TestReduceSelectedRows (input_scope_idx);
238
253
}
254
+ TEST (ReduceTester, TestCPUReduceTestLodTensor) {
255
+ TestReduceOpHandle test_op;
256
+ size_t input_scope_idx = 0 ;
257
+ test_op.InitCtxOnGpu (false );
258
+ test_op.InitReduceOp (input_scope_idx);
259
+ test_op.TestReduceLodTensors (input_scope_idx);
260
+ }
261
+ #ifdef PADDLE_WITH_CUDA
239
262
240
- // #ifdef PADDLE_WITH_CUDA
241
- //
242
- // TEST(ReduceTester, TestGPUReduceTestSelectedRows) {
243
- // TestReduceOpHandle test_op;
244
- // size_t input_scope_idx = 0;
245
- // test_op.InitCtxOnGpu(true);
246
- // test_op.InitReduceOp(input_scope_idx);
247
- // test_op.TestReduceSelectedRows(input_scope_idx);
248
- // }
249
- //
250
- // TEST(ReduceTester, TestCPUReduceTestLodTensor) {
251
- // TestReduceOpHandle test_op;
252
- // size_t input_scope_idx = 0;
253
- // test_op.InitCtxOnGpu(true);
254
- // test_op.InitReduceOp(input_scope_idx);
255
- // test_op.TestReduceLodTensors(input_scope_idx);
256
- // }
257
- // #endif
263
+ TEST (ReduceTester, TestGPUReduceTestSelectedRows) {
264
+ TestReduceOpHandle test_op;
265
+ size_t input_scope_idx = 0 ;
266
+ test_op.InitCtxOnGpu (true );
267
+ test_op.InitReduceOp (input_scope_idx);
268
+ test_op.TestReduceSelectedRows (input_scope_idx);
269
+ }
270
+
271
+ TEST (ReduceTester, TestGPUReduceTestLodTensor) {
272
+ TestReduceOpHandle test_op;
273
+ size_t input_scope_idx = 0 ;
274
+ test_op.InitCtxOnGpu (true );
275
+ test_op.InitReduceOp (input_scope_idx);
276
+ test_op.TestReduceLodTensors (input_scope_idx);
277
+ }
278
+ #endif
258
279
259
280
} // namespace details
260
281
} // namespace framework
0 commit comments