Skip to content

Commit dcaf183

Browse files
committed
builder SSA graph at the beginning.
1 parent 2b2406e commit dcaf183

File tree

6 files changed

+150
-26
lines changed

6 files changed

+150
-26
lines changed

paddle/fluid/framework/details/multi_devices_graph_builder.cc

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -221,15 +221,15 @@ std::unique_ptr<Graph> MultiDevSSAGraphBuilder::Apply(
221221
// forward, backward nodes. E.g. you can't append an forward node
222222
// at the end of the node list.
223223
// TODO(panyx0718): FIXME: Needs to sort by forward->backward order.
224-
for (auto &node : nodes) {
225-
if (node->NodeType() != ir::Node::Type::kOperation) continue;
224+
for (ir::Node *node : TopologySortOperationFromInToOut(nodes)) {
225+
VLOG(3) << "apply node: " << node->Name() << reinterpret_cast<void *>(node);
226226
if (boost::get<int>(
227227
node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName())) ==
228228
static_cast<int>(OpRole::kRPC)) {
229-
CreateRPCOp(&result, node.get());
230-
} else if (IsDistTrainOp(node.get(), send_vars, recv_vars)) {
231-
CreateDistTrainOp(&result, node.get());
232-
} else if (IsScaleLossOp(node.get())) {
229+
CreateRPCOp(&result, node);
230+
} else if (IsDistTrainOp(node, send_vars, recv_vars)) {
231+
CreateDistTrainOp(&result, node);
232+
} else if (IsScaleLossOp(node)) {
233233
// user can customize loss@grad if not use_default_grad_scale_
234234
if (strategy_.gradient_scale_ !=
235235
BuildStrategy::GradientScaleStrategy::kCustomized) {
@@ -240,10 +240,11 @@ std::unique_ptr<Graph> MultiDevSSAGraphBuilder::Apply(
240240
// It also assumes backward op will always follow the forward op in
241241
// the block.
242242
is_forwarding = false;
243+
LOG(ERROR) << "forward flipping!!!!!!!";
243244
} else {
244-
int op_dev_id = GetOpDeviceID(node.get());
245+
int op_dev_id = GetOpDeviceID(node);
245246
if (op_dev_id != -1) { // This op only runs on one specific device.
246-
CreateComputationalOp(&result, node.get(), op_dev_id);
247+
CreateComputationalOp(&result, node, op_dev_id);
247248
for (ir::Node *n : node->outputs) {
248249
var_name_on_devices_.emplace(n->Name(), op_dev_id);
249250
}
@@ -252,13 +253,11 @@ std::unique_ptr<Graph> MultiDevSSAGraphBuilder::Apply(
252253
// gradients.
253254
if (node->Op()->Type() == "read" && strategy_.enable_data_balance_) {
254255
node->Op()->SetAttr("throw_eof_exp", false);
255-
CreateComputationalOps(&result, node.get(), places_.size());
256-
// TODO(paddle-dev): builder shouldn't depend on the out logic of
257-
// a specific op.
256+
CreateComputationalOps(&result, node, places_.size());
258257
const auto &data_var_names = node->Op()->Output("Out");
259258
InsertDataBalanceOp(&result, data_var_names);
260259
} else {
261-
CreateComputationalOps(&result, node.get(), places_.size());
260+
CreateComputationalOps(&result, node, places_.size());
262261
}
263262

264263
if (!is_forwarding && places_.size() > 1) {
@@ -479,8 +478,8 @@ int MultiDevSSAGraphBuilder::GetOpDeviceID(ir::Node *node) const {
479478

480479
PADDLE_ENFORCE_EQ(param_grad.size(), 2U);
481480
int dev_id = GetVarDeviceID(param_grad[1]);
482-
PADDLE_ENFORCE_NE(dev_id, -1, "dev_id should not be -1.[%s, %s]",
483-
node->Op()->Type(), param_grad[0]);
481+
PADDLE_ENFORCE_NE(dev_id, -1, "dev_id should not be -1.[%s, %s, %s]",
482+
node->Op()->Type(), param_grad[0], param_grad[1]);
484483
return dev_id;
485484
}
486485

paddle/fluid/framework/details/ssa_graph_builder.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,17 @@ void SSAGraphBuilder::PolishGraphToSupportDataHazards(Graph *graph) {
3737
continue;
3838
}
3939

40+
bool has_dep = false;
41+
for (auto read_out : read_op->Outputs()) {
42+
for (auto write_in : write_op->Inputs()) {
43+
if (read_out == write_in) {
44+
has_dep = true;
45+
break;
46+
}
47+
}
48+
}
49+
if (has_dep) continue;
50+
4051
auto *dep_var = new DummyVarHandle(
4152
graph->CreateEmptyNode("dummy", ir::Node::Type::kVariable));
4253
read_op->AddOutput(dep_var);

paddle/fluid/framework/ir/graph.cc

Lines changed: 121 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,55 +12,164 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

15+
#include <algorithm>
16+
#include <unordered_set>
17+
1518
#include "paddle/fluid/framework/ir/graph.h"
19+
#include "paddle/fluid/framework/op_proto_maker.h"
1620
#include "paddle/fluid/framework/program_desc.h"
1721
#include "paddle/fluid/framework/var_desc.h"
1822

1923
namespace paddle {
2024
namespace framework {
25+
namespace {
26+
void SortHelper(
27+
const std::map<ir::Node *, std::unordered_set<ir::Node *>> &adj_list,
28+
ir::Node *node, std::unordered_set<ir::Node *> *visited,
29+
std::vector<ir::Node *> *ret) {
30+
visited->insert(node);
31+
32+
for (auto adj : adj_list.at(node)) {
33+
if (visited->find(adj) == visited->end()) {
34+
SortHelper(adj_list, adj, visited, ret);
35+
}
36+
}
37+
38+
VLOG(3) << "topology sort insert: " << node->Name()
39+
<< reinterpret_cast<void *>(node) << " input " << node->inputs.size();
40+
ret->push_back(node);
41+
}
42+
} // namespace
2143

22-
// NOTE(paddle-dev): This graph contains circle.
2344
Graph::Graph(const ProgramDesc &program) : program_(program) {
2445
VLOG(3) << "block in program:" << program_.Size();
2546
std::unordered_map<std::string, VarDesc *> all_vars;
2647
for (auto *var : program.Block(0).AllVars()) {
2748
all_vars.emplace(var->Name(), var);
2849
}
2950

30-
std::map<std::string, ir::Node *> var_nodes;
51+
ir::Node *last_backward = nullptr;
52+
std::vector<ir::Node *> optimize_ops;
53+
std::map<std::string, std::vector<ir::Node *>> var_nodes;
3154
for (auto *op : program.Block(0).AllOps()) {
3255
ir::Node *node = CreateOpNode(op);
56+
if (boost::get<int>(
57+
op->GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName())) ==
58+
static_cast<int>(OpRole::kBackward)) {
59+
last_backward = node;
60+
} else if (boost::get<int>(
61+
op->GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName())) ==
62+
static_cast<int>(OpRole::kOptimize)) {
63+
optimize_ops.push_back(node);
64+
}
3365

3466
for (auto &each_var_name : op->InputArgumentNames()) {
3567
ir::Node *var = nullptr;
3668
if (var_nodes.find(each_var_name) != var_nodes.end()) {
37-
var = var_nodes.at(each_var_name);
69+
var = var_nodes.at(each_var_name).back();
3870
} else if (all_vars.count(each_var_name) != 0) {
3971
var = CreateVarNode(all_vars.at(each_var_name));
40-
var_nodes[each_var_name] = var;
72+
var_nodes[each_var_name].push_back(var);
4173
} else {
4274
// TODO(paddle-dev): Seems some assumption doesn't hold?
4375
VLOG(3) << op->Type()
4476
<< " input var not in all_var list: " << each_var_name;
4577
var = CreateEmptyNode(each_var_name, ir::Node::Type::kVariable);
46-
var_nodes[each_var_name] = var;
78+
var_nodes[each_var_name].push_back(var);
4779
}
4880
node->inputs.push_back(var);
4981
var->outputs.push_back(node);
5082
}
5183

5284
for (auto &each_var_name : op->OutputArgumentNames()) {
53-
ir::Node *var = nullptr;
54-
if (var_nodes.find(each_var_name) != var_nodes.end()) {
55-
var = var_nodes.at(each_var_name);
56-
} else {
57-
var = CreateVarNode(all_vars.at(each_var_name));
58-
var_nodes[each_var_name] = var;
59-
}
85+
ir::Node *var = CreateVarNode(all_vars.at(each_var_name));
86+
var_nodes[each_var_name].push_back(var);
6087
node->outputs.push_back(var);
6188
var->inputs.push_back(node);
6289
}
6390
}
91+
for (auto &var : var_nodes) {
92+
auto &versions = var.second;
93+
if (versions.size() <= 1) continue;
94+
95+
auto it_new = versions.rbegin();
96+
auto it_old = versions.rbegin();
97+
++it_old;
98+
for (; it_old != versions.rend(); it_new = it_old, ++it_old) {
99+
ir::Node *write_op =
100+
(*it_new)->inputs.empty() ? nullptr : (*it_new)->inputs[0];
101+
const auto &read_ops = (*it_old)->outputs;
102+
103+
for (auto *read_op : read_ops) {
104+
// Manually add a dependency var from read_op to write_op;
105+
if (read_op == write_op) {
106+
// Read Write is the same op.
107+
continue;
108+
}
109+
ir::Node *dep_var = CreateEmptyNode("dummy", ir::Node::Type::kVariable);
110+
read_op->outputs.push_back(dep_var);
111+
dep_var->inputs.push_back(read_op);
112+
write_op->inputs.push_back(dep_var);
113+
dep_var->outputs.push_back(write_op);
114+
}
115+
}
116+
}
117+
118+
if (last_backward) {
119+
for (ir::Node *opt_node : optimize_ops) {
120+
ir::Node *dep_var = CreateEmptyNode("dummy", ir::Node::Type::kVariable);
121+
last_backward->outputs.push_back(dep_var);
122+
dep_var->inputs.push_back(last_backward);
123+
opt_node->inputs.push_back(dep_var);
124+
dep_var->outputs.push_back(opt_node);
125+
VLOG(3) << "appending connect: " << last_backward->Name()
126+
<< reinterpret_cast<void *>(last_backward) << "->"
127+
<< opt_node->Name() << reinterpret_cast<void *>(opt_node);
128+
}
129+
}
130+
}
131+
132+
std::vector<ir::Node *> TopologySortOperationFromInToOut(
133+
const std::vector<std::unique_ptr<ir::Node>> &nodes) {
134+
std::map<ir::Node *, std::unordered_set<ir::Node *>> adj_list;
135+
std::unordered_set<ir::Node *> visited;
136+
std::vector<ir::Node *> ret;
137+
138+
for (auto &n : nodes) {
139+
if (n->NodeType() != ir::Node::Type::kOperation) continue;
140+
if (adj_list.find(n.get()) == adj_list.end()) {
141+
adj_list[n.get()] = std::unordered_set<ir::Node *>();
142+
}
143+
for (auto &var : n->inputs) {
144+
for (auto &adj_n : var->inputs) {
145+
PADDLE_ENFORCE(adj_n->NodeType() == ir::Node::Type::kOperation);
146+
adj_list[n.get()].insert(adj_n);
147+
LOG(ERROR) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
148+
<< " -> " << n->Name() << reinterpret_cast<void *>(n.get())
149+
<< " via " << var->Name() << reinterpret_cast<void *>(var);
150+
}
151+
}
152+
}
153+
154+
for (auto adj : adj_list) {
155+
if (visited.find(adj.first) == visited.end()) {
156+
SortHelper(adj_list, adj.first, &visited, &ret);
157+
}
158+
}
159+
160+
for (ir::Node *n : ret) {
161+
std::unordered_set<ir::Node *> dummy;
162+
n->inputs.erase(
163+
std::remove_if(n->inputs.begin(), n->inputs.end(),
164+
[n](ir::Node *t) { return t->Name() == "dummy"; }),
165+
n->inputs.end());
166+
n->outputs.erase(
167+
std::remove_if(n->outputs.begin(), n->outputs.end(),
168+
[n](ir::Node *t) { return t->Name() == "dummy"; }),
169+
n->outputs.end());
170+
}
171+
return ret;
64172
}
173+
65174
} // namespace framework
66175
} // namespace paddle

paddle/fluid/framework/ir/graph.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,5 +78,8 @@ class Graph {
7878
std::map<std::string, std::function<void(void)>> attr_dels_;
7979
};
8080

81+
std::vector<ir::Node*> TopologySortOperationFromInToOut(
82+
const std::vector<std::unique_ptr<ir::Node>>& nodes);
83+
8184
} // namespace framework
8285
} // namespace paddle

paddle/fluid/framework/ir/graph_test.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ TEST(GraphTest, Basic) {
7676
op->SetType("sum");
7777
op->SetInput("X", {"test_a", "test_b", "test_c"});
7878
op->SetOutput("Out", {"test_out"});
79+
op->SetAttr("op_role", 1);
7980

8081
prog.MutableBlock(0)->Var("test_a")->SetType(proto::VarType::SELECTED_ROWS);
8182
prog.MutableBlock(0)->Var("test_b")->SetType(proto::VarType::SELECTED_ROWS);

paddle/fluid/framework/ir/node.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class Node {
5050
PADDLE_ENFORCE(type_ == Type::kVariable);
5151
return var_desc_;
5252
}
53+
5354
OpDesc* Op() {
5455
PADDLE_ENFORCE(type_ == Type::kOperation);
5556
return op_desc_;

0 commit comments

Comments
 (0)