Skip to content

Commit 3db9fad

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fix_vlog
test=develop
2 parents 3da43dc + 387610a commit 3db9fad

37 files changed

+879
-213
lines changed

paddle/fluid/API.spec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None
174174
paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None))
175175
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'name'], varargs=None, keywords=None, defaults=(None,))
176176
paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
177+
paddle.fluid.layers.space_to_depth ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,))
177178
paddle.fluid.layers.affine_grid ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,))
178179
paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
179180
paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None))

paddle/fluid/framework/details/broadcast_op_handle_test.h

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,9 @@ struct TestBroadcastOpHandle {
3737
std::vector<Scope*> local_scopes_;
3838
std::vector<Scope*> param_scopes_;
3939
Scope g_scope_;
40-
std::unique_ptr<OpHandleBase> op_handle_;
41-
std::vector<std::unique_ptr<VarHandleBase>> vars_;
40+
OpHandleBase* op_handle_;
41+
std::vector<VarHandleBase*> vars_;
42+
std::vector<std::unique_ptr<ir::Node>> nodes_;
4243
std::vector<p::Place> place_list_;
4344
bool use_gpu_;
4445
#ifdef PADDLE_WITH_CUDA
@@ -90,6 +91,7 @@ struct TestBroadcastOpHandle {
9091
}
9192

9293
void InitBroadcastOp(size_t input_scope_idx) {
94+
nodes_.clear();
9395
for (size_t j = 0; j < place_list_.size(); ++j) {
9496
local_scopes_.push_back(&(g_scope_.NewScope()));
9597
Scope& local_scope = local_scopes_.back()->NewScope();
@@ -101,60 +103,60 @@ struct TestBroadcastOpHandle {
101103
}
102104
param_scopes_[input_scope_idx]->Var("input");
103105

104-
std::unique_ptr<ir::Node> n =
105-
ir::CreateNodeForTest("node0", ir::Node::Type::kOperation);
106+
nodes_.emplace_back(
107+
ir::CreateNodeForTest("node0", ir::Node::Type::kOperation));
106108
if (use_gpu_) {
107109
#ifdef PADDLE_WITH_CUDA
108-
op_handle_.reset(new BroadcastOpHandle(n.get(), local_scopes_,
109-
place_list_, nccl_ctxs_.get()));
110+
op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_,
111+
place_list_, nccl_ctxs_.get());
110112
#else
111113
PADDLE_THROW("CUDA is not support.");
112114
#endif
113115
} else {
114116
#ifdef PADDLE_WITH_CUDA
115-
op_handle_.reset(new BroadcastOpHandle(n.get(), local_scopes_,
116-
place_list_, nccl_ctxs_.get()));
117+
op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_,
118+
place_list_, nccl_ctxs_.get());
117119
#else
118-
op_handle_.reset(
119-
new BroadcastOpHandle(n.get(), local_scopes_, place_list_));
120+
op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_,
121+
place_list_);
120122
#endif
121123
}
122124

123-
std::unique_ptr<ir::Node> v =
124-
ir::CreateNodeForTest("node1", ir::Node::Type::kVariable);
125-
auto* in_var_handle = new VarHandle(v.get(), 1, input_scope_idx, "input",
126-
place_list_[input_scope_idx]);
125+
nodes_.emplace_back(
126+
ir::CreateNodeForTest("node1", ir::Node::Type::kVariable));
127+
auto* in_var_handle = new VarHandle(nodes_.back().get(), 1, input_scope_idx,
128+
"input", place_list_[input_scope_idx]);
127129
vars_.emplace_back(in_var_handle);
128130
op_handle_->AddInput(in_var_handle);
129131

130132
// add dummy var
131133

132-
std::unique_ptr<ir::Node> v2 =
133-
ir::CreateNodeForTest("node2", ir::Node::Type::kVariable);
134-
vars_.emplace_back(new DummyVarHandle(v2.get()));
134+
nodes_.emplace_back(
135+
ir::CreateNodeForTest("node2", ir::Node::Type::kVariable));
136+
vars_.emplace_back(new DummyVarHandle(nodes_.back().get()));
135137
DummyVarHandle* dummy_var_handle =
136-
static_cast<DummyVarHandle*>(vars_.back().get());
138+
static_cast<DummyVarHandle*>(vars_.back());
137139
dummy_var_handle->ClearGeneratedOp();
138140
op_handle_->AddInput(dummy_var_handle);
139141

140142
for (size_t j = 0; j < place_list_.size(); ++j) {
141143
if (!use_gpu_) {
142144
op_handle_->SetDeviceContext(place_list_[j], ctxs_[j].get());
143145
}
144-
std::unique_ptr<ir::Node> v3 =
145-
ir::CreateNodeForTest("node3", ir::Node::Type::kVariable);
146+
nodes_.emplace_back(
147+
ir::CreateNodeForTest("node3", ir::Node::Type::kVariable));
146148
VarHandle* out_var_handle =
147-
new VarHandle(v3.get(), 2, j, "out", place_list_[j]);
149+
new VarHandle(nodes_.back().get(), 2, j, "out", place_list_[j]);
148150
vars_.emplace_back(out_var_handle);
149151
op_handle_->AddOutput(out_var_handle);
150152
}
151153

152154
// add dummy var
153-
std::unique_ptr<ir::Node> v4 =
154-
ir::CreateNodeForTest("node4", ir::Node::Type::kVariable);
155-
vars_.emplace_back(new DummyVarHandle(v4.get()));
155+
nodes_.emplace_back(
156+
ir::CreateNodeForTest("node4", ir::Node::Type::kVariable));
157+
vars_.emplace_back(new DummyVarHandle(nodes_.back().get()));
156158
DummyVarHandle* out_dummy_var_handle =
157-
static_cast<DummyVarHandle*>(vars_.back().get());
159+
static_cast<DummyVarHandle*>(vars_.back());
158160
out_dummy_var_handle->ClearGeneratedOp();
159161
op_handle_->AddOutput(out_dummy_var_handle);
160162
}

paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <vector>
1717
#include "paddle/fluid/framework/details/fetch_op_handle.h"
1818
#include "paddle/fluid/framework/details/multi_devices_helper.h"
19+
#include "paddle/fluid/framework/ir/graph_helper.h"
1920

2021
namespace paddle {
2122
namespace framework {
@@ -32,13 +33,11 @@ FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor(
3233
pool_(strategy.num_threads_ +
3334
1), // add one more thread for generate op_deps
3435
fetch_ctxs_(places) {
35-
auto &ops = graph_->Get<details::GraphOps>("ops");
36-
37-
for (auto &op : ops) {
36+
for (auto &op : ir::FilterByNodeWrapper<OpHandleBase>(*graph_)) {
3837
int dep = static_cast<int>(op->NotReadyInputSize());
39-
op_deps_.emplace(op.get(), dep);
38+
op_deps_.emplace(op, dep);
4039
if (dep == 0) {
41-
bootstrap_ops_.emplace_back(op.get());
40+
bootstrap_ops_.emplace_back(op);
4241
}
4342
}
4443

@@ -54,13 +53,13 @@ FeedFetchList FastThreadedSSAGraphExecutor::Run(
5453
paddle::framework::FeedFetchList fetches;
5554
fetches.resize(fetch_tensors.size());
5655
std::unordered_map<std::string, std::vector<VarHandleBase *>> fetched_vars;
57-
std::vector<std::unique_ptr<FetchOpHandle>> fetch_ops;
56+
std::vector<FetchOpHandle *> fetch_ops;
5857

5958
for (auto &fetch_var_name : fetch_tensors) {
6059
for (auto &var_map : graph_->Get<details::GraphVars>("vars")) {
6160
auto it = var_map.find(fetch_var_name);
6261
if (it != var_map.end()) {
63-
fetched_vars[fetch_var_name].push_back(it->second.rbegin()->get());
62+
fetched_vars[fetch_var_name].push_back(*it->second.rbegin());
6463
}
6564
}
6665
}
@@ -110,7 +109,10 @@ FeedFetchList FastThreadedSSAGraphExecutor::Run(
110109
complete_q->Pop();
111110
}
112111
}
113-
exception_.ReThrow();
112+
if (exception_.IsCaught()) {
113+
ClearFetchOp(graph_.get(), &fetch_ops);
114+
exception_.ReThrow();
115+
}
114116
}
115117
num_complete += num_comp;
116118
}

paddle/fluid/framework/details/fetch_op_handle.cc

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,7 @@ FetchOpHandle::FetchOpHandle(ir::Node *node, FeedFetchList *data, size_t offset,
2828
offset_(offset),
2929
local_scopes_(local_scopes) {}
3030

31-
FetchOpHandle::~FetchOpHandle() {
32-
for (auto *input_var : inputs_) {
33-
input_var->RemoveOutput(this, this->Node());
34-
}
35-
}
31+
FetchOpHandle::~FetchOpHandle() {}
3632

3733
void FetchOpHandle::RecordWaitEventOnCtx(platform::DeviceContext *waited_ctx) {
3834
PADDLE_THROW("Nobody should wait FetchOp. Unexpceted Error");

paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ namespace details {
2222

2323
struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
2424
std::vector<std::string> out_varnames_;
25+
std::vector<std::unique_ptr<ir::Node>> nodes_;
2526

2627
void InitFusedBroadcastOp(std::vector<size_t> input_scope_idxes) {
28+
nodes_.clear();
2729
// initialize scope and var
2830
for (size_t i = 0; i < place_list_.size(); ++i) {
2931
local_scopes_.push_back(&(g_scope_.NewScope()));
@@ -39,41 +41,41 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
3941
}
4042

4143
// create op handle node
42-
std::unique_ptr<ir::Node> n =
43-
ir::CreateNodeForTest("fused_broadcast", ir::Node::Type::kOperation);
44+
nodes_.emplace_back(
45+
ir::CreateNodeForTest("fused_broadcast", ir::Node::Type::kOperation));
4446
if (use_gpu_) {
4547
#ifdef PADDLE_WITH_CUDA
46-
op_handle_.reset(new FusedBroadcastOpHandle(
47-
n.get(), local_scopes_, place_list_, nccl_ctxs_.get()));
48+
op_handle_ = new FusedBroadcastOpHandle(
49+
nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get());
4850
#else
4951
PADDLE_THROW("CUDA is not supported.");
5052
#endif
5153
} else {
5254
#ifdef PADDLE_WITH_CUDA
53-
op_handle_.reset(new FusedBroadcastOpHandle(
54-
n.get(), local_scopes_, place_list_, nccl_ctxs_.get()));
55+
op_handle_ = new FusedBroadcastOpHandle(
56+
nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get());
5557
#else
56-
op_handle_.reset(
57-
new FusedBroadcastOpHandle(n.get(), local_scopes_, place_list_));
58+
op_handle_ = new FusedBroadcastOpHandle(nodes_.back().get(),
59+
local_scopes_, place_list_);
5860
#endif
5961
}
6062

6163
for (size_t i = 0; i < input_scope_idxes.size(); ++i) {
6264
// add input var handle
63-
std::unique_ptr<ir::Node> in_node =
64-
ir::CreateNodeForTest("in_node" + i, ir::Node::Type::kVariable);
65+
nodes_.emplace_back(
66+
ir::CreateNodeForTest("in_node" + i, ir::Node::Type::kVariable));
6567
VarHandle* in_var_handle =
66-
new VarHandle(in_node.get(), 1, input_scope_idxes[i], "in_var" + i,
67-
place_list_[input_scope_idxes[i]]);
68+
new VarHandle(nodes_.back().get(), 1, input_scope_idxes[i],
69+
"in_var" + i, place_list_[input_scope_idxes[i]]);
6870
vars_.emplace_back(in_var_handle);
6971
op_handle_->AddInput(in_var_handle);
7072

7173
// add output var handle
7274
for (size_t j = 0; j < place_list_.size(); ++j) {
73-
std::unique_ptr<ir::Node> out_node =
74-
ir::CreateNodeForTest("out_node" + i, ir::Node::Type::kVariable);
75-
VarHandle* out_var_handle =
76-
new VarHandle(out_node.get(), 2, j, "out_var" + i, place_list_[j]);
75+
nodes_.emplace_back(
76+
ir::CreateNodeForTest("out_node" + i, ir::Node::Type::kVariable));
77+
VarHandle* out_var_handle = new VarHandle(
78+
nodes_.back().get(), 2, j, "out_var" + i, place_list_[j]);
7779
vars_.emplace_back(out_var_handle);
7880
op_handle_->AddOutput(out_var_handle);
7981
}

paddle/fluid/framework/details/gather_op_handle_test.cc

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@ struct TestGatherOpHandle {
3131
std::vector<Scope*> local_scopes_;
3232
std::vector<Scope*> param_scopes_;
3333
Scope g_scope_;
34-
std::unique_ptr<OpHandleBase> op_handle_;
35-
std::vector<std::unique_ptr<VarHandleBase>> vars_;
34+
OpHandleBase* op_handle_;
35+
std::vector<VarHandleBase*> vars_;
3636
std::vector<p::Place> gpu_list_;
37+
std::vector<std::unique_ptr<ir::Node>> nodes_;
3738

3839
void WaitAll() {
3940
for (size_t j = 0; j < ctxs_.size(); ++j) {
@@ -70,7 +71,7 @@ struct TestGatherOpHandle {
7071
}
7172

7273
void InitGatherOp(size_t input_scope_idx) {
73-
std::vector<std::unique_ptr<ir::Node>> nodes;
74+
nodes_.clear();
7475
for (size_t j = 0; j < gpu_list_.size(); ++j) {
7576
local_scopes_.push_back(&(g_scope_.NewScope()));
7677
Scope& local_scope = local_scopes_.back()->NewScope();
@@ -82,44 +83,45 @@ struct TestGatherOpHandle {
8283
}
8384
param_scopes_[input_scope_idx]->Var("out");
8485

85-
nodes.emplace_back(
86+
nodes_.emplace_back(
8687
ir::CreateNodeForTest("node", ir::Node::Type::kOperation).release());
87-
op_handle_.reset(
88-
new GatherOpHandle(nodes.back().get(), local_scopes_, gpu_list_));
88+
op_handle_ =
89+
new GatherOpHandle(nodes_.back().get(), local_scopes_, gpu_list_);
8990
// add input
9091
for (size_t j = 0; j < gpu_list_.size(); ++j) {
9192
op_handle_->SetDeviceContext(gpu_list_[j], ctxs_[j].get());
92-
nodes.emplace_back(
93+
nodes_.emplace_back(
9394
ir::CreateNodeForTest("node1", ir::Node::Type::kVariable).release());
9495
auto* in_var_handle =
95-
new VarHandle(nodes.back().get(), 1, j, "input", gpu_list_[j]);
96+
new VarHandle(nodes_.back().get(), 1, j, "input", gpu_list_[j]);
9697
vars_.emplace_back(in_var_handle);
9798
op_handle_->AddInput(in_var_handle);
9899
}
99100

100101
// add dummy var
101-
nodes.emplace_back(
102+
nodes_.emplace_back(
102103
ir::CreateNodeForTest("node2", ir::Node::Type::kVariable).release());
103-
vars_.emplace_back(new DummyVarHandle(nodes.back().get()));
104+
vars_.emplace_back(new DummyVarHandle(nodes_.back().get()));
104105
DummyVarHandle* in_dummy_var_handle =
105-
static_cast<DummyVarHandle*>(vars_.back().get());
106+
static_cast<DummyVarHandle*>(vars_.back());
106107
in_dummy_var_handle->ClearGeneratedOp();
107108
op_handle_->AddInput(in_dummy_var_handle);
108109

109110
// add output
110-
nodes.emplace_back(
111+
nodes_.emplace_back(
111112
ir::CreateNodeForTest("node3", ir::Node::Type::kVariable).release());
112-
auto* out_var_handle = new VarHandle(nodes.back().get(), 2, input_scope_idx,
113-
"out", gpu_list_[input_scope_idx]);
113+
auto* out_var_handle =
114+
new VarHandle(nodes_.back().get(), 2, input_scope_idx, "out",
115+
gpu_list_[input_scope_idx]);
114116
vars_.emplace_back(out_var_handle);
115117
op_handle_->AddOutput(out_var_handle);
116118

117119
// add dummy var
118-
nodes.emplace_back(
120+
nodes_.emplace_back(
119121
ir::CreateNodeForTest("node4", ir::Node::Type::kVariable).release());
120-
vars_.emplace_back(new DummyVarHandle(nodes.back().get()));
122+
vars_.emplace_back(new DummyVarHandle(nodes_.back().get()));
121123
DummyVarHandle* dummy_var_handle =
122-
static_cast<DummyVarHandle*>(vars_.back().get());
124+
static_cast<DummyVarHandle*>(vars_.back());
123125
op_handle_->AddOutput(dummy_var_handle);
124126
}
125127

paddle/fluid/framework/details/modify_op_lock_and_record_event_pass.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "paddle/fluid/framework/details/computation_op_handle.h"
1717
#include "paddle/fluid/framework/details/multi_devices_helper.h"
1818
#include "paddle/fluid/framework/details/op_graph_view.h"
19+
#include "paddle/fluid/framework/ir/graph_helper.h"
1920

2021
namespace paddle {
2122
namespace framework {
@@ -35,10 +36,10 @@ static bool IsLockAndRecordEventFreeComputationOpHandle(
3536

3637
std::unique_ptr<ir::Graph> ModifyOpLockAndRecordEventPass::ApplyImpl(
3738
std::unique_ptr<ir::Graph> ir_graph) const {
38-
auto &all_ops = ir_graph->Get<GraphOps>(kGraphOps);
39+
auto all_ops = ir::FilterByNodeWrapper<OpHandleBase>(*ir_graph);
3940
OpGraphView graph_view(all_ops);
4041
for (auto &op : all_ops) {
41-
auto *compute_op = dynamic_cast<ComputationOpHandle *>(op.get());
42+
auto *compute_op = dynamic_cast<ComputationOpHandle *>(op);
4243
if (compute_op == nullptr) continue;
4344
bool is_lock_and_record_event_free =
4445
IsLockAndRecordEventFreeComputationOpHandle(compute_op, graph_view);

paddle/fluid/framework/details/multi_devices_graph_check_pass.cc

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "paddle/fluid/framework/details/multi_devices_graph_check_pass.h"
1616
#include <string>
1717
#include "paddle/fluid/framework/ir/graph.h"
18+
#include "paddle/fluid/framework/ir/graph_helper.h"
1819

1920
namespace paddle {
2021
namespace framework {
@@ -36,20 +37,20 @@ bool SSAGraghBuilderWithChecker::IsValidGraph(const ir::Graph *graph) const {
3637
for (auto &var_map : graph->Get<GraphVars>(kGraphVars)) {
3738
for (auto &name_pair : var_map) {
3839
for (auto &version_pair : name_pair.second) {
39-
insert_pending_var(version_pair.get());
40+
insert_pending_var(version_pair);
4041
}
4142
}
4243
}
4344

4445
for (auto &var : graph->Get<GraphDepVars>(kGraphDepVars)) {
45-
insert_pending_var(var.get());
46+
insert_pending_var(var);
4647
}
4748

48-
for (auto &op : graph->Get<GraphOps>(kGraphOps)) {
49+
for (OpHandleBase *op : ir::FilterByNodeWrapper<OpHandleBase>(*graph)) {
4950
if (op->Inputs().empty()) {
50-
ready_ops.insert(op.get());
51+
ready_ops.insert(op);
5152
} else {
52-
pending_ops.insert({op.get(), op.get()->NoDupInputSize()});
53+
pending_ops.insert({op, op->NoDupInputSize()});
5354
}
5455
}
5556

@@ -89,6 +90,4 @@ bool SSAGraghBuilderWithChecker::IsValidGraph(const ir::Graph *graph) const {
8990
REGISTER_PASS(multi_devices_check_pass,
9091
paddle::framework::details::SSAGraghBuilderWithChecker)
9192
.RequireGraphAttr(paddle::framework::details::kGraphVars)
92-
.RequireGraphAttr(paddle::framework::details::kGraphDepVars)
93-
.RequireGraphAttr(paddle::framework::details::kGraphOps)
94-
.RequireGraphAttr(paddle::framework::details::kShardedVarDevice);
93+
.RequireGraphAttr(paddle::framework::details::kGraphDepVars);

0 commit comments

Comments
 (0)