Skip to content

Commit fbc1640

Browse files
committed
Merge remote-tracking branch 'ups/develop' into refine/op/fc
2 parents 0098a49 + d96ee24 commit fbc1640

File tree

3 files changed

+139
-6
lines changed

3 files changed

+139
-6
lines changed

paddle/fluid/framework/ir/graph.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,38 @@ namespace paddle {
2828
namespace framework {
2929
namespace ir {
3030

31+
/*
32+
* The graph is a Directed Acyclic Single Static Assignment Graph.
33+
*
34+
* In more detail, the following properties must hold:
35+
*
36+
* The graph shouldn't contain cycle. Each node is a black-box to the graph
37+
* so the node itself could be a loop operator.
38+
*
39+
* Each Variable-type node has only one input (thus single static assignment).
40+
*
41+
* The output/input of operator is variable and the output/input of variable
42+
* is operator.
43+
*
44+
* The following data harzards in Program are addressed in the Graph:
45+
*
46+
* Write-After-Read
47+
* a = op1(x)
48+
* x = op2(b)
49+
* A control-dependency connection is created bettwen op1 and op2 such that
50+
* op1->op2, so as to ensure correct order.
51+
*
52+
* Write-After-Write
53+
* x = op1(a)
54+
* x = op2(b)
55+
* A control-dependency connection is created between op1 and op2 such that
56+
* op1->op2, so as to ensure correct order.
57+
*
58+
* Other properties currently hold, but is not enforced yet:
59+
*
60+
* Variable-type node (not control dep) with the same variable name share
61+
* the same underlying VarDesc.
62+
*/
3163
class Graph {
3264
public:
3365
explicit Graph(const ProgramDesc &program);

paddle/fluid/framework/ir/graph_test.cc

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class SumOpMaker : public OpProtoAndCheckerMaker {
3636
public:
3737
void Make() {
3838
AddInput("X", "").AsDuplicable();
39-
AddOutput("Out", "");
39+
AddOutput("Out", "").AsDuplicable();
4040
AddComment("");
4141
}
4242
};
@@ -59,11 +59,27 @@ class SumOpVarTypeInference : public VarTypeInference {
5959
block->Var(out_var_name)->SetType(default_var_type);
6060
}
6161
};
62+
63+
class DummyOpMaker : public OpProtoAndCheckerMaker {
64+
public:
65+
void Make() {
66+
AddInput("X", "").AsDuplicable();
67+
AddOutput("Out", "").AsDuplicable();
68+
AddComment("");
69+
}
70+
};
71+
72+
class DummyOpVarTypeInference : public VarTypeInference {
73+
public:
74+
void operator()(const OpDesc &op_desc, BlockDesc *block) const override {}
75+
};
6276
} // namespace framework
6377
} // namespace paddle
6478

6579
REGISTER_OPERATOR(sum, paddle::framework::NOP, paddle::framework::SumOpMaker,
6680
paddle::framework::SumOpVarTypeInference);
81+
REGISTER_OPERATOR(dummy, paddle::framework::NOP, paddle::framework::SumOpMaker,
82+
paddle::framework::SumOpVarTypeInference);
6783
REGISTER_OPERATOR(sum_without_infer_var_type, paddle::framework::NOP,
6884
paddle::framework::SumOpMaker);
6985

@@ -110,5 +126,83 @@ TEST(GraphTest, Basic) {
110126
}
111127
ASSERT_EQ(nodes.size(), 5);
112128
}
129+
130+
TEST(GraphTest, WriteAfterRead) {
131+
// void Test() {
132+
ProgramDesc prog;
133+
auto *op = prog.MutableBlock(0)->AppendOp();
134+
op->SetType("sum");
135+
op->SetInput("X", {"a"});
136+
op->SetOutput("Out", {"b"});
137+
op->SetAttr("op_role", 1);
138+
139+
op = prog.MutableBlock(0)->AppendOp();
140+
op->SetType("dummy");
141+
op->SetInput("X", {"c"});
142+
op->SetOutput("Out", {"a"});
143+
op->SetAttr("op_role", 1);
144+
145+
prog.MutableBlock(0)->Var("a")->SetType(proto::VarType::LOD_TENSOR);
146+
prog.MutableBlock(0)->Var("b")->SetType(proto::VarType::LOD_TENSOR);
147+
prog.MutableBlock(0)->Var("c")->SetType(proto::VarType::LOD_TENSOR);
148+
149+
std::unique_ptr<ir::Graph> g(new ir::Graph(prog));
150+
ir::Node *control_dep1 = nullptr;
151+
ir::Node *control_dep2 = nullptr;
152+
for (ir::Node *n : g->Nodes()) {
153+
if (n->Name() == "sum") {
154+
ASSERT_EQ(n->outputs[0]->Name(), "b");
155+
ASSERT_TRUE(ir::IsControlDepVar(*n->outputs[1]));
156+
control_dep1 = n->outputs[1];
157+
ASSERT_EQ(n->outputs.size(), 2);
158+
}
159+
if (n->Name() == "dummy") {
160+
ASSERT_EQ(n->inputs[0]->Name(), "c");
161+
ASSERT_TRUE(ir::IsControlDepVar(*n->inputs[1]));
162+
control_dep2 = n->inputs[1];
163+
ASSERT_EQ(n->inputs.size(), 2);
164+
}
165+
}
166+
ASSERT_EQ(control_dep1, control_dep2);
167+
}
168+
169+
TEST(GraphTest, WriteAfterWrite) {
170+
// void Test() {
171+
ProgramDesc prog;
172+
auto *op = prog.MutableBlock(0)->AppendOp();
173+
op->SetType("sum");
174+
op->SetInput("X", {"a"});
175+
op->SetOutput("Out", {"b"});
176+
op->SetAttr("op_role", 1);
177+
178+
op = prog.MutableBlock(0)->AppendOp();
179+
op->SetType("dummy");
180+
op->SetInput("X", {"c"});
181+
op->SetOutput("Out", {"b"});
182+
op->SetAttr("op_role", 1);
183+
184+
prog.MutableBlock(0)->Var("a")->SetType(proto::VarType::LOD_TENSOR);
185+
prog.MutableBlock(0)->Var("b")->SetType(proto::VarType::LOD_TENSOR);
186+
prog.MutableBlock(0)->Var("c")->SetType(proto::VarType::LOD_TENSOR);
187+
188+
std::unique_ptr<ir::Graph> g(new ir::Graph(prog));
189+
ir::Node *control_dep1 = nullptr;
190+
ir::Node *control_dep2 = nullptr;
191+
for (ir::Node *n : g->Nodes()) {
192+
if (n->Name() == "sum") {
193+
ASSERT_EQ(n->outputs[0]->Name(), "b");
194+
ASSERT_TRUE(ir::IsControlDepVar(*n->outputs[1]));
195+
ASSERT_EQ(n->outputs.size(), 2);
196+
control_dep1 = n->outputs[1];
197+
}
198+
if (n->Name() == "dummy") {
199+
ASSERT_EQ(n->inputs[0]->Name(), "c");
200+
ASSERT_TRUE(ir::IsControlDepVar(*n->inputs[1]));
201+
control_dep2 = n->inputs[1];
202+
ASSERT_EQ(n->inputs.size(), 2);
203+
ASSERT_EQ(control_dep1, control_dep2);
204+
}
205+
}
206+
}
113207
} // namespace framework
114208
} // namespace paddle

paddle/fluid/platform/profiler.cc

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -270,12 +270,13 @@ struct EventItem {
270270
double min_time;
271271
double max_time;
272272
double ave_time;
273+
float ratio;
273274
};
274275

275276
// Print results
276277
void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
277278
const std::string& sorted_domain, const size_t name_width,
278-
const size_t data_width) {
279+
const size_t data_width, double total) {
279280
// Output header information
280281
std::cout << "\n------------------------->"
281282
<< " Profiling Report "
@@ -300,7 +301,8 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
300301
std::cout << std::setw(name_width) << "Event" << std::setw(data_width)
301302
<< "Calls" << std::setw(data_width) << "Total"
302303
<< std::setw(data_width) << "Min." << std::setw(data_width)
303-
<< "Max." << std::setw(data_width) << "Ave." << std::endl;
304+
<< "Max." << std::setw(data_width) << "Ave."
305+
<< std::setw(data_width) << "Ratio." << std::endl;
304306
for (size_t i = 0; i < events_table.size(); ++i) {
305307
for (size_t j = 0; j < events_table[i].size(); ++j) {
306308
const EventItem& event_item = events_table[i][j];
@@ -309,7 +311,9 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
309311
<< std::setw(data_width) << event_item.total_time
310312
<< std::setw(data_width) << event_item.min_time
311313
<< std::setw(data_width) << event_item.max_time
312-
<< std::setw(data_width) << event_item.ave_time << std::endl;
314+
<< std::setw(data_width) << event_item.ave_time
315+
<< std::setw(data_width) << event_item.total_time / total
316+
<< std::endl;
313317
}
314318
}
315319
std::cout << std::endl;
@@ -359,6 +363,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
359363

360364
std::vector<std::vector<EventItem>> events_table;
361365
size_t max_name_width = 0;
366+
double total = 0.; // the total time
362367
for (size_t i = 0; i < events.size(); i++) {
363368
std::list<Event> pushed_events;
364369
std::vector<EventItem> event_items;
@@ -379,6 +384,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
379384
g_state == ProfilerState::kAll)
380385
? rit->CudaElapsedMs(events[i][j])
381386
: rit->CpuElapsedMs(events[i][j]);
387+
total += event_time;
382388

383389
std::string event_name =
384390
"thread" + std::to_string(rit->thread_id()) + "::" + rit->name();
@@ -387,7 +393,8 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
387393
if (event_idx.find(event_name) == event_idx.end()) {
388394
event_idx[event_name] = event_items.size();
389395
EventItem event_item = {event_name, 1, event_time,
390-
event_time, event_time, event_time};
396+
event_time, event_time, event_time,
397+
0.};
391398
event_items.push_back(event_item);
392399
} else {
393400
int index = event_idx[event_name];
@@ -431,7 +438,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
431438
}
432439

433440
// Print report
434-
PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12);
441+
PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12, total);
435442
}
436443

437444
void DisableProfiler(EventSortingKey sorted_key,

0 commit comments

Comments
 (0)