|
12 | 12 | // See the License for the specific language governing permissions and
|
13 | 13 | // limitations under the License.
|
14 | 14 |
|
15 |
| -#include "paddle/fluid/framework/details/broadcast_op_handle.h" |
16 |
| -#include "gtest/gtest.h" |
17 |
| - |
18 |
| -#include "paddle/fluid/platform/device_context.h" |
| 15 | +#include "paddle/fluid/framework/details/broadcast_op_handle_test.h" |
19 | 16 |
|
20 | 17 | namespace paddle {
|
21 | 18 | namespace framework {
|
22 | 19 | namespace details {
|
23 | 20 |
|
24 |
| -namespace f = paddle::framework; |
25 |
| -namespace p = paddle::platform; |
26 |
| - |
27 |
| -// test data amount |
28 |
| -const f::DDim kDims = {20, 20}; |
29 |
| - |
30 |
| -struct TestBroadcastOpHandle { |
31 |
| - std::vector<std::unique_ptr<p::DeviceContext>> ctxs_; |
32 |
| - std::vector<Scope*> local_scopes_; |
33 |
| - std::vector<Scope*> param_scopes_; |
34 |
| - Scope g_scope_; |
35 |
| - std::unique_ptr<OpHandleBase> op_handle_; |
36 |
| - std::vector<std::unique_ptr<VarHandleBase>> vars_; |
37 |
| - std::vector<p::Place> gpu_list_; |
38 |
| - bool use_gpu_; |
39 |
| -#ifdef PADDLE_WITH_CUDA |
40 |
| - std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_; |
41 |
| -#endif |
42 |
| - |
43 |
| - void WaitAll() { |
44 |
| - for (size_t j = 0; j < ctxs_.size(); ++j) { |
45 |
| - ctxs_[j]->Wait(); |
46 |
| - } |
47 |
| -#ifdef PADDLE_WITH_CUDA |
48 |
| - if (nccl_ctxs_) { |
49 |
| - nccl_ctxs_->WaitAll(); |
50 |
| - } |
51 |
| -#endif |
52 |
| - } |
53 |
| - |
54 |
| - void InitCtxOnGpu(bool use_gpu) { |
55 |
| - use_gpu_ = use_gpu; |
56 |
| - if (use_gpu_) { |
57 |
| -#ifdef PADDLE_WITH_CUDA |
58 |
| - int count = p::GetCUDADeviceCount(); |
59 |
| - if (count <= 1) { |
60 |
| - LOG(WARNING) << "Cannot test multi-gpu Broadcast, because the CUDA " |
61 |
| - "device count is " |
62 |
| - << count; |
63 |
| - exit(0); |
64 |
| - } |
65 |
| - for (int i = 0; i < count; ++i) { |
66 |
| - auto p = p::CUDAPlace(i); |
67 |
| - gpu_list_.push_back(p); |
68 |
| - ctxs_.emplace_back(new p::CUDADeviceContext(p)); |
69 |
| - } |
70 |
| - nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_)); |
71 |
| -#else |
72 |
| - PADDLE_THROW("CUDA is not support."); |
73 |
| -#endif |
74 |
| - } else { |
75 |
| - int count = 8; |
76 |
| - for (int i = 0; i < count; ++i) { |
77 |
| - auto p = p::CPUPlace(); |
78 |
| - gpu_list_.push_back(p); |
79 |
| - ctxs_.emplace_back(new p::CPUDeviceContext(p)); |
80 |
| - } |
81 |
| -#ifdef PADDLE_WITH_CUDA |
82 |
| - nccl_ctxs_.reset(nullptr); |
83 |
| -#endif |
84 |
| - } |
85 |
| - } |
86 |
| - |
87 |
| - void InitBroadcastOp(size_t input_scope_idx) { |
88 |
| - for (size_t j = 0; j < gpu_list_.size(); ++j) { |
89 |
| - local_scopes_.push_back(&(g_scope_.NewScope())); |
90 |
| - Scope& local_scope = local_scopes_.back()->NewScope(); |
91 |
| - *local_scopes_.back() |
92 |
| - ->Var(details::kLocalExecScopeName) |
93 |
| - ->GetMutable<Scope*>() = &local_scope; |
94 |
| - local_scope.Var("out"); |
95 |
| - param_scopes_.emplace_back(&local_scope); |
96 |
| - } |
97 |
| - param_scopes_[input_scope_idx]->Var("input"); |
98 |
| - |
99 |
| - std::unique_ptr<ir::Node> n = |
100 |
| - ir::CreateNodeForTest("node0", ir::Node::Type::kOperation); |
101 |
| - if (use_gpu_) { |
102 |
| -#ifdef PADDLE_WITH_CUDA |
103 |
| - op_handle_.reset(new BroadcastOpHandle(n.get(), local_scopes_, gpu_list_, |
104 |
| - nccl_ctxs_.get())); |
105 |
| -#else |
106 |
| - PADDLE_THROW("CUDA is not support."); |
107 |
| -#endif |
108 |
| - } else { |
109 |
| -#ifdef PADDLE_WITH_CUDA |
110 |
| - op_handle_.reset(new BroadcastOpHandle(n.get(), local_scopes_, gpu_list_, |
111 |
| - nccl_ctxs_.get())); |
112 |
| -#else |
113 |
| - op_handle_.reset( |
114 |
| - new BroadcastOpHandle(n.get(), local_scopes_, gpu_list_)); |
115 |
| -#endif |
116 |
| - } |
117 |
| - |
118 |
| - std::unique_ptr<ir::Node> v = |
119 |
| - ir::CreateNodeForTest("node1", ir::Node::Type::kVariable); |
120 |
| - auto* in_var_handle = new VarHandle(v.get(), 1, input_scope_idx, "input", |
121 |
| - gpu_list_[input_scope_idx]); |
122 |
| - vars_.emplace_back(in_var_handle); |
123 |
| - op_handle_->AddInput(in_var_handle); |
124 |
| - |
125 |
| - // add dummy var |
126 |
| - |
127 |
| - std::unique_ptr<ir::Node> v2 = |
128 |
| - ir::CreateNodeForTest("node2", ir::Node::Type::kVariable); |
129 |
| - vars_.emplace_back(new DummyVarHandle(v2.get())); |
130 |
| - DummyVarHandle* dummy_var_handle = |
131 |
| - static_cast<DummyVarHandle*>(vars_.back().get()); |
132 |
| - dummy_var_handle->ClearGeneratedOp(); |
133 |
| - op_handle_->AddInput(dummy_var_handle); |
134 |
| - |
135 |
| - for (size_t j = 0; j < gpu_list_.size(); ++j) { |
136 |
| - if (!use_gpu_) { |
137 |
| - op_handle_->SetDeviceContext(gpu_list_[j], ctxs_[j].get()); |
138 |
| - } |
139 |
| - std::unique_ptr<ir::Node> v3 = |
140 |
| - ir::CreateNodeForTest("node3", ir::Node::Type::kVariable); |
141 |
| - VarHandle* out_var_handle = |
142 |
| - new VarHandle(v3.get(), 2, j, "out", gpu_list_[j]); |
143 |
| - vars_.emplace_back(out_var_handle); |
144 |
| - op_handle_->AddOutput(out_var_handle); |
145 |
| - } |
146 |
| - |
147 |
| - // add dummy var |
148 |
| - std::unique_ptr<ir::Node> v4 = |
149 |
| - ir::CreateNodeForTest("node4", ir::Node::Type::kVariable); |
150 |
| - vars_.emplace_back(new DummyVarHandle(v4.get())); |
151 |
| - DummyVarHandle* out_dummy_var_handle = |
152 |
| - static_cast<DummyVarHandle*>(vars_.back().get()); |
153 |
| - out_dummy_var_handle->ClearGeneratedOp(); |
154 |
| - op_handle_->AddOutput(out_dummy_var_handle); |
155 |
| - } |
156 |
| - |
157 |
| - void TestBroadcastLodTensor(size_t input_scope_idx) { |
158 |
| - auto in_var = param_scopes_[input_scope_idx]->FindVar("input"); |
159 |
| - PADDLE_ENFORCE_NOT_NULL(in_var); |
160 |
| - auto in_lod_tensor = in_var->GetMutable<f::LoDTensor>(); |
161 |
| - in_lod_tensor->mutable_data<float>(kDims, gpu_list_[input_scope_idx]); |
162 |
| - |
163 |
| - std::vector<float> send_vector(static_cast<size_t>(f::product(kDims))); |
164 |
| - for (size_t k = 0; k < send_vector.size(); ++k) { |
165 |
| - send_vector[k] = k; |
166 |
| - } |
167 |
| - f::LoD lod{{0, 10, 20}}; |
168 |
| - paddle::framework::TensorFromVector<float>( |
169 |
| - send_vector, *(ctxs_[input_scope_idx]), in_lod_tensor); |
170 |
| - in_lod_tensor->set_lod(lod); |
171 |
| - in_lod_tensor->Resize(kDims); |
172 |
| - |
173 |
| - op_handle_->Run(false); |
174 |
| - |
175 |
| - WaitAll(); |
176 |
| - |
177 |
| - p::CPUPlace cpu_place; |
178 |
| - for (size_t j = 0; j < gpu_list_.size(); ++j) { |
179 |
| - auto out_var = param_scopes_[j]->FindVar("out"); |
180 |
| - PADDLE_ENFORCE_NOT_NULL(out_var); |
181 |
| - auto out_tensor = out_var->Get<f::LoDTensor>(); |
182 |
| - PADDLE_ENFORCE_EQ(out_tensor.lod(), lod, "lod is not equal."); |
183 |
| - |
184 |
| - f::Tensor result_tensor; |
185 |
| - f::TensorCopySync(out_tensor, cpu_place, &result_tensor); |
186 |
| - float* ct = result_tensor.mutable_data<float>(cpu_place); |
187 |
| - |
188 |
| - for (int64_t i = 0; i < f::product(kDims); ++i) { |
189 |
| - ASSERT_NEAR(ct[i], send_vector[i], 1e-5); |
190 |
| - } |
191 |
| - } |
192 |
| - } |
193 |
| - |
194 |
| - void TestBroadcastSelectedRows(size_t input_scope_idx) { |
195 |
| - auto in_var = param_scopes_[input_scope_idx]->FindVar("input"); |
196 |
| - PADDLE_ENFORCE_NOT_NULL(in_var); |
197 |
| - auto in_selected_rows = in_var->GetMutable<f::SelectedRows>(); |
198 |
| - auto value = in_selected_rows->mutable_value(); |
199 |
| - value->mutable_data<float>(kDims, gpu_list_[input_scope_idx]); |
200 |
| - int height = static_cast<int>(kDims[0]) * 2; |
201 |
| - std::vector<int64_t> rows{0, 1, 2, 3, 3, 0, 14, 7, 3, 1, |
202 |
| - 2, 4, 6, 3, 1, 1, 1, 1, 3, 7}; |
203 |
| - in_selected_rows->set_height(height); |
204 |
| - in_selected_rows->set_rows(rows); |
205 |
| - |
206 |
| - std::vector<float> send_vector(static_cast<size_t>(f::product(kDims))); |
207 |
| - for (size_t k = 0; k < send_vector.size(); ++k) { |
208 |
| - send_vector[k] = k; |
209 |
| - } |
210 |
| - paddle::framework::TensorFromVector<float>( |
211 |
| - send_vector, *(ctxs_[input_scope_idx]), value); |
212 |
| - |
213 |
| - op_handle_->Run(false); |
214 |
| - |
215 |
| - WaitAll(); |
216 |
| - |
217 |
| - p::CPUPlace cpu_place; |
218 |
| - for (size_t j = 0; j < gpu_list_.size(); ++j) { |
219 |
| - auto out_var = param_scopes_[j]->FindVar("out"); |
220 |
| - PADDLE_ENFORCE_NOT_NULL(out_var); |
221 |
| - auto& out_select_rows = out_var->Get<f::SelectedRows>(); |
222 |
| - auto rt = out_select_rows.value(); |
223 |
| - |
224 |
| - PADDLE_ENFORCE_EQ(out_select_rows.height(), height, |
225 |
| - "height is not equal."); |
226 |
| - for (size_t k = 0; k < out_select_rows.rows().size(); ++k) { |
227 |
| - PADDLE_ENFORCE_EQ(out_select_rows.rows()[k], rows[k]); |
228 |
| - } |
229 |
| - |
230 |
| - f::Tensor result_tensor; |
231 |
| - f::TensorCopySync(rt, cpu_place, &result_tensor); |
232 |
| - float* ct = result_tensor.data<float>(); |
233 |
| - |
234 |
| - for (int64_t i = 0; i < f::product(kDims); ++i) { |
235 |
| - ASSERT_NEAR(ct[i], send_vector[i], 1e-5); |
236 |
| - } |
237 |
| - } |
238 |
| - } |
239 |
| -}; |
240 |
| - |
241 | 21 | TEST(BroadcastTester, TestCPUBroadcastTestLodTensor) {
|
242 | 22 | TestBroadcastOpHandle test_op;
|
243 | 23 | size_t input_scope_idx = 0;
|
|
0 commit comments