Skip to content

Commit d10b8ef

Browse files
author
Yan Xu
authored
Merge pull request #14152 from Yancey1989/add_fused_broadcast_unittest
add fused broadcast op unit test
2 parents c21597c + 6bfa6a0 commit d10b8ef

File tree

4 files changed

+438
-221
lines changed

4 files changed

+438
-221
lines changed

paddle/fluid/framework/details/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ cc_library(scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_execu
5656
# device_context reduce_op_handle )
5757
cc_library(fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executor.cc
5858
DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool device_context)
59+
cc_test(fused_broadcast_op_test SRCS fused_broadcast_op_handle_test.cc DEPS fused_broadcast_op_handle)
5960

6061
cc_library(build_strategy SRCS build_strategy.cc DEPS
6162
graph_viz_pass multi_devices_graph_pass

paddle/fluid/framework/details/broadcast_op_handle_test.cc

Lines changed: 1 addition & 221 deletions
Original file line numberDiff line numberDiff line change
@@ -12,232 +12,12 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#include "paddle/fluid/framework/details/broadcast_op_handle.h"
16-
#include "gtest/gtest.h"
17-
18-
#include "paddle/fluid/platform/device_context.h"
15+
#include "paddle/fluid/framework/details/broadcast_op_handle_test.h"
1916

2017
namespace paddle {
2118
namespace framework {
2219
namespace details {
2320

24-
namespace f = paddle::framework;
25-
namespace p = paddle::platform;
26-
27-
// test data amount
28-
const f::DDim kDims = {20, 20};
29-
30-
struct TestBroadcastOpHandle {
31-
std::vector<std::unique_ptr<p::DeviceContext>> ctxs_;
32-
std::vector<Scope*> local_scopes_;
33-
std::vector<Scope*> param_scopes_;
34-
Scope g_scope_;
35-
std::unique_ptr<OpHandleBase> op_handle_;
36-
std::vector<std::unique_ptr<VarHandleBase>> vars_;
37-
std::vector<p::Place> gpu_list_;
38-
bool use_gpu_;
39-
#ifdef PADDLE_WITH_CUDA
40-
std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_;
41-
#endif
42-
43-
void WaitAll() {
44-
for (size_t j = 0; j < ctxs_.size(); ++j) {
45-
ctxs_[j]->Wait();
46-
}
47-
#ifdef PADDLE_WITH_CUDA
48-
if (nccl_ctxs_) {
49-
nccl_ctxs_->WaitAll();
50-
}
51-
#endif
52-
}
53-
54-
void InitCtxOnGpu(bool use_gpu) {
55-
use_gpu_ = use_gpu;
56-
if (use_gpu_) {
57-
#ifdef PADDLE_WITH_CUDA
58-
int count = p::GetCUDADeviceCount();
59-
if (count <= 1) {
60-
LOG(WARNING) << "Cannot test multi-gpu Broadcast, because the CUDA "
61-
"device count is "
62-
<< count;
63-
exit(0);
64-
}
65-
for (int i = 0; i < count; ++i) {
66-
auto p = p::CUDAPlace(i);
67-
gpu_list_.push_back(p);
68-
ctxs_.emplace_back(new p::CUDADeviceContext(p));
69-
}
70-
nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_));
71-
#else
72-
PADDLE_THROW("CUDA is not support.");
73-
#endif
74-
} else {
75-
int count = 8;
76-
for (int i = 0; i < count; ++i) {
77-
auto p = p::CPUPlace();
78-
gpu_list_.push_back(p);
79-
ctxs_.emplace_back(new p::CPUDeviceContext(p));
80-
}
81-
#ifdef PADDLE_WITH_CUDA
82-
nccl_ctxs_.reset(nullptr);
83-
#endif
84-
}
85-
}
86-
87-
void InitBroadcastOp(size_t input_scope_idx) {
88-
for (size_t j = 0; j < gpu_list_.size(); ++j) {
89-
local_scopes_.push_back(&(g_scope_.NewScope()));
90-
Scope& local_scope = local_scopes_.back()->NewScope();
91-
*local_scopes_.back()
92-
->Var(details::kLocalExecScopeName)
93-
->GetMutable<Scope*>() = &local_scope;
94-
local_scope.Var("out");
95-
param_scopes_.emplace_back(&local_scope);
96-
}
97-
param_scopes_[input_scope_idx]->Var("input");
98-
99-
std::unique_ptr<ir::Node> n =
100-
ir::CreateNodeForTest("node0", ir::Node::Type::kOperation);
101-
if (use_gpu_) {
102-
#ifdef PADDLE_WITH_CUDA
103-
op_handle_.reset(new BroadcastOpHandle(n.get(), local_scopes_, gpu_list_,
104-
nccl_ctxs_.get()));
105-
#else
106-
PADDLE_THROW("CUDA is not support.");
107-
#endif
108-
} else {
109-
#ifdef PADDLE_WITH_CUDA
110-
op_handle_.reset(new BroadcastOpHandle(n.get(), local_scopes_, gpu_list_,
111-
nccl_ctxs_.get()));
112-
#else
113-
op_handle_.reset(
114-
new BroadcastOpHandle(n.get(), local_scopes_, gpu_list_));
115-
#endif
116-
}
117-
118-
std::unique_ptr<ir::Node> v =
119-
ir::CreateNodeForTest("node1", ir::Node::Type::kVariable);
120-
auto* in_var_handle = new VarHandle(v.get(), 1, input_scope_idx, "input",
121-
gpu_list_[input_scope_idx]);
122-
vars_.emplace_back(in_var_handle);
123-
op_handle_->AddInput(in_var_handle);
124-
125-
// add dummy var
126-
127-
std::unique_ptr<ir::Node> v2 =
128-
ir::CreateNodeForTest("node2", ir::Node::Type::kVariable);
129-
vars_.emplace_back(new DummyVarHandle(v2.get()));
130-
DummyVarHandle* dummy_var_handle =
131-
static_cast<DummyVarHandle*>(vars_.back().get());
132-
dummy_var_handle->ClearGeneratedOp();
133-
op_handle_->AddInput(dummy_var_handle);
134-
135-
for (size_t j = 0; j < gpu_list_.size(); ++j) {
136-
if (!use_gpu_) {
137-
op_handle_->SetDeviceContext(gpu_list_[j], ctxs_[j].get());
138-
}
139-
std::unique_ptr<ir::Node> v3 =
140-
ir::CreateNodeForTest("node3", ir::Node::Type::kVariable);
141-
VarHandle* out_var_handle =
142-
new VarHandle(v3.get(), 2, j, "out", gpu_list_[j]);
143-
vars_.emplace_back(out_var_handle);
144-
op_handle_->AddOutput(out_var_handle);
145-
}
146-
147-
// add dummy var
148-
std::unique_ptr<ir::Node> v4 =
149-
ir::CreateNodeForTest("node4", ir::Node::Type::kVariable);
150-
vars_.emplace_back(new DummyVarHandle(v4.get()));
151-
DummyVarHandle* out_dummy_var_handle =
152-
static_cast<DummyVarHandle*>(vars_.back().get());
153-
out_dummy_var_handle->ClearGeneratedOp();
154-
op_handle_->AddOutput(out_dummy_var_handle);
155-
}
156-
157-
void TestBroadcastLodTensor(size_t input_scope_idx) {
158-
auto in_var = param_scopes_[input_scope_idx]->FindVar("input");
159-
PADDLE_ENFORCE_NOT_NULL(in_var);
160-
auto in_lod_tensor = in_var->GetMutable<f::LoDTensor>();
161-
in_lod_tensor->mutable_data<float>(kDims, gpu_list_[input_scope_idx]);
162-
163-
std::vector<float> send_vector(static_cast<size_t>(f::product(kDims)));
164-
for (size_t k = 0; k < send_vector.size(); ++k) {
165-
send_vector[k] = k;
166-
}
167-
f::LoD lod{{0, 10, 20}};
168-
paddle::framework::TensorFromVector<float>(
169-
send_vector, *(ctxs_[input_scope_idx]), in_lod_tensor);
170-
in_lod_tensor->set_lod(lod);
171-
in_lod_tensor->Resize(kDims);
172-
173-
op_handle_->Run(false);
174-
175-
WaitAll();
176-
177-
p::CPUPlace cpu_place;
178-
for (size_t j = 0; j < gpu_list_.size(); ++j) {
179-
auto out_var = param_scopes_[j]->FindVar("out");
180-
PADDLE_ENFORCE_NOT_NULL(out_var);
181-
auto out_tensor = out_var->Get<f::LoDTensor>();
182-
PADDLE_ENFORCE_EQ(out_tensor.lod(), lod, "lod is not equal.");
183-
184-
f::Tensor result_tensor;
185-
f::TensorCopySync(out_tensor, cpu_place, &result_tensor);
186-
float* ct = result_tensor.mutable_data<float>(cpu_place);
187-
188-
for (int64_t i = 0; i < f::product(kDims); ++i) {
189-
ASSERT_NEAR(ct[i], send_vector[i], 1e-5);
190-
}
191-
}
192-
}
193-
194-
void TestBroadcastSelectedRows(size_t input_scope_idx) {
195-
auto in_var = param_scopes_[input_scope_idx]->FindVar("input");
196-
PADDLE_ENFORCE_NOT_NULL(in_var);
197-
auto in_selected_rows = in_var->GetMutable<f::SelectedRows>();
198-
auto value = in_selected_rows->mutable_value();
199-
value->mutable_data<float>(kDims, gpu_list_[input_scope_idx]);
200-
int height = static_cast<int>(kDims[0]) * 2;
201-
std::vector<int64_t> rows{0, 1, 2, 3, 3, 0, 14, 7, 3, 1,
202-
2, 4, 6, 3, 1, 1, 1, 1, 3, 7};
203-
in_selected_rows->set_height(height);
204-
in_selected_rows->set_rows(rows);
205-
206-
std::vector<float> send_vector(static_cast<size_t>(f::product(kDims)));
207-
for (size_t k = 0; k < send_vector.size(); ++k) {
208-
send_vector[k] = k;
209-
}
210-
paddle::framework::TensorFromVector<float>(
211-
send_vector, *(ctxs_[input_scope_idx]), value);
212-
213-
op_handle_->Run(false);
214-
215-
WaitAll();
216-
217-
p::CPUPlace cpu_place;
218-
for (size_t j = 0; j < gpu_list_.size(); ++j) {
219-
auto out_var = param_scopes_[j]->FindVar("out");
220-
PADDLE_ENFORCE_NOT_NULL(out_var);
221-
auto& out_select_rows = out_var->Get<f::SelectedRows>();
222-
auto rt = out_select_rows.value();
223-
224-
PADDLE_ENFORCE_EQ(out_select_rows.height(), height,
225-
"height is not equal.");
226-
for (size_t k = 0; k < out_select_rows.rows().size(); ++k) {
227-
PADDLE_ENFORCE_EQ(out_select_rows.rows()[k], rows[k]);
228-
}
229-
230-
f::Tensor result_tensor;
231-
f::TensorCopySync(rt, cpu_place, &result_tensor);
232-
float* ct = result_tensor.data<float>();
233-
234-
for (int64_t i = 0; i < f::product(kDims); ++i) {
235-
ASSERT_NEAR(ct[i], send_vector[i], 1e-5);
236-
}
237-
}
238-
}
239-
};
240-
24121
TEST(BroadcastTester, TestCPUBroadcastTestLodTensor) {
24222
TestBroadcastOpHandle test_op;
24323
size_t input_scope_idx = 0;

0 commit comments

Comments
 (0)