Skip to content

Commit a94a735

Browse files
author
chengduo
authored
Refine the GraphNum check (#14144)
* refine GraphCheck test=develop * fix ci fail test=develop
1 parent 48be9dc commit a94a735

File tree

3 files changed

+34
-10
lines changed

3 files changed

+34
-10
lines changed

paddle/fluid/framework/ir/graph_helper.cc

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,15 @@ limitations under the License. */
1515
#include "paddle/fluid/framework/ir/graph_helper.h"
1616
#include <algorithm>
1717
#include <deque>
18+
#include <fstream>
19+
#include <iosfwd>
20+
#include <ostream>
1821
#include <unordered_set>
1922

23+
DEFINE_string(print_sub_graph_dir, "",
24+
"FLAGS_print_sub_graph_dir is used "
25+
"to print the nodes of sub_graphs.");
26+
2027
namespace paddle {
2128
namespace framework {
2229
namespace ir {
@@ -164,12 +171,15 @@ size_t GraphNum(const Graph &graph) {
164171
graph_nodes.emplace_back(g_nodes);
165172
}
166173

167-
if (VLOG_IS_ON(100)) {
168-
VLOG(100) << "graph_num: " << graph_nodes.size();
169-
for (auto &g_n : graph_nodes) {
170-
VLOG(100) << "graph_nodes: " << g_n.size();
171-
if (g_n.size() < 10) {
172-
std::stringstream out;
174+
if (FLAGS_print_sub_graph_dir.size()) {
175+
if (graph_nodes.size() > 1) {
176+
std::stringstream out;
177+
for (auto &g_n : graph_nodes) {
178+
out << "graph_nodes: " << g_n.size() << "\n";
179+
}
180+
out << "\n\n";
181+
for (auto &g_n : graph_nodes) {
182+
out << "graph_nodes: " << g_n.size();
173183
for (auto &node : g_n) {
174184
out << "\nNode: " << node->Name() << " in [";
175185
for (auto &n : node->inputs) {
@@ -181,8 +191,12 @@ size_t GraphNum(const Graph &graph) {
181191
}
182192
out << "]";
183193
}
184-
VLOG(100) << out.str();
194+
out << "\n\n\n";
185195
}
196+
std::unique_ptr<std::ostream> fout(
197+
new std::ofstream(FLAGS_print_sub_graph_dir));
198+
PADDLE_ENFORCE(fout->good());
199+
*fout << out.str();
186200
}
187201
}
188202

paddle/fluid/framework/parallel_executor.cc

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,17 @@ ParallelExecutor::ParallelExecutor(
171171
}
172172
// If the loss_var_name is given, the number of graph should be only one.
173173
if (loss_var_name.size()) {
174-
PADDLE_ENFORCE_EQ(ir::GraphNum(*graph), 1,
175-
"The number of graph should be only one");
174+
size_t graph_num = ir::GraphNum(*graph);
175+
if (graph_num > 1) {
176+
LOG(WARNING)
177+
<< "The number of graph should be only one, "
178+
"but the current graph has "
179+
<< ir::GraphNum(*graph)
180+
<< " sub_graphs. If you want to see the nodes of the "
181+
"sub_graphs, you should use 'FLAGS_print_sub_graph_dir' "
182+
"to specify the output dir. NOTES: if you not do training, "
183+
"please don't pass loss_var_name.";
184+
}
176185
}
177186

178187
if (exec_strategy.type_ == ExecutionStrategy::kDefault) {

python/paddle/fluid/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,8 @@ def __bootstrap__():
116116
'use_mkldnn', 'use_ngraph', 'initial_cpu_memory_in_mb',
117117
'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads',
118118
"dist_threadpool_size", 'cpu_deterministic', 'eager_delete_tensor_gb',
119-
'allocator_strategy', 'reader_queue_speed_test_mode'
119+
'allocator_strategy', 'reader_queue_speed_test_mode',
120+
'print_sub_graph_dir'
120121
]
121122
if os.name != 'nt':
122123
read_env_flags.append('warpctc_dir')

0 commit comments

Comments
 (0)