Skip to content

Commit d4e8d70

Browse files
committed
Merge remote-tracking branch 'origin/develop' into fix/sequence_pad
test=develop
2 parents 7f3c6ea + daed473 commit d4e8d70

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1836
-120
lines changed

paddle/fluid/API.spec

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ paddle.fluid.layers.conv3d ArgSpec(args=['input', 'num_filters', 'filter_size',
6767
paddle.fluid.layers.sequence_pool ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,))
6868
paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None))
6969
paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None))
70-
paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None))
71-
paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None))
70+
paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
71+
paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
7272
paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False))
7373
paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,))
7474
paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
@@ -103,7 +103,7 @@ paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 's
103103
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
104104
paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None)
105105
paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None))
106-
paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100))
106+
paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode'], varargs=None, keywords=None, defaults=(False, -100, False))
107107
paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None))
108108
paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None)
109109
paddle.fluid.layers.autoincreased_step_counter ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1))
@@ -178,6 +178,7 @@ paddle.fluid.layers.affine_grid ArgSpec(args=['theta', 'out_shape', 'name'], var
178178
paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
179179
paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None))
180180
paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None))
181+
paddle.fluid.layers.grid_sampler ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,))
181182
paddle.fluid.layers.log_loss ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None))
182183
paddle.fluid.layers.add_position_encoding ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,))
183184
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))

paddle/fluid/framework/details/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,15 @@ if(WITH_GPU)
3535
all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle graph graph_helper pass)
3636
endif()
3737

38+
cc_library(sequential_execution_pass SRCS sequential_execution_pass.cc DEPS graph graph_helper pass)
39+
3840
cc_library(multi_devices_graph_pass SRCS multi_devices_graph_pass.cc DEPS multi_devices_helper computation_op_handle
3941
scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle fused_broadcast_op_handle)
4042

4143
if(WITH_GPU)
42-
cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS graph framework_proto reference_count_pass)
44+
cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS graph framework_proto reference_count_pass sequential_execution_pass)
4345
else()
44-
cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS graph framework_proto)
46+
cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS graph framework_proto sequential_execution_pass)
4547
endif()
4648

4749
cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope

paddle/fluid/framework/details/build_strategy.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ limitations under the License. */
1616

1717
#include "paddle/fluid/framework/details/multi_devices_graph_check_pass.h"
1818
#include "paddle/fluid/framework/details/multi_devices_graph_print_pass.h"
19+
#include "paddle/fluid/framework/details/sequential_execution_pass.h"
1920
#include "paddle/fluid/framework/ir/graph.h"
2021
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
2122

@@ -27,6 +28,10 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
2728
public:
2829
explicit ParallelExecutorPassBuilder(const BuildStrategy &strategy)
2930
: ir::PassBuilder(), strategy_(strategy) {
31+
if (strategy_.enable_sequential_execution_) {
32+
AppendPass("sequential_execution_pass");
33+
}
34+
3035
// Add a graph viz pass to record a graph.
3136
if (!strategy_.debug_graphviz_path_.empty()) {
3237
auto viz_pass = AppendPass("graph_viz_pass");
@@ -110,6 +115,11 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
110115
pass->Erase("nccl_ctxs");
111116
pass->SetNotOwned<platform::NCCLContextMap>("nccl_ctxs", nctx);
112117
#endif
118+
} else if (pass->Type() == "sequential_execution_pass") {
119+
pass->Erase(kAllOpDescs);
120+
pass->Set<const std::vector<OpDesc *>>(
121+
kAllOpDescs,
122+
new std::vector<OpDesc *>(main_program.Block(0).AllOps()));
113123
}
114124
graph = pass->Apply(std::move(graph));
115125
}
@@ -125,3 +135,4 @@ USE_PASS(multi_batch_merge_pass);
125135
USE_PASS(multi_devices_pass);
126136
USE_PASS(multi_devices_check_pass);
127137
USE_PASS(multi_devices_print_pass);
138+
USE_PASS(sequential_execution_pass);

paddle/fluid/framework/details/build_strategy.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ struct BuildStrategy {
6969

7070
bool enable_data_balance_{false};
7171

72+
bool enable_sequential_execution_{false};
73+
7274
bool fuse_broadcast_op_{false};
7375

7476
// User normally doesn't need to call this API.
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/framework/details/sequential_execution_pass.h"
16+
#include <string>
17+
#include <unordered_map>
18+
#include <unordered_set>
19+
#include <vector>
20+
#include "paddle/fluid/framework/op_proto_maker.h"
21+
22+
namespace paddle {
23+
namespace framework {
24+
namespace details {
25+
26+
static bool IsSameOpDesc(OpDesc *op1, OpDesc *op2) {
27+
return op1->Type() == op2->Type() && op1->Inputs() == op2->Inputs() &&
28+
op1->Outputs() == op2->Outputs();
29+
}
30+
31+
std::unique_ptr<ir::Graph> SequentialExecutionPass::ApplyImpl(
32+
std::unique_ptr<ir::Graph> graph) const {
33+
// FIXME(zjl): Insert dependencies between some distributed ops may cause
34+
// the multi_devices_graph_pass fails. So we skip these ops here.
35+
// Indeed, maybe we should not insert dependencies between these ops
36+
// casually, which may cause deadlock easily.
37+
// We should add more skipped distributed ops when found errors in
38+
// multi_devices_graph_pass
39+
static std::unordered_set<std::string> skip_dist_ops{
40+
"send", "recv", "send_barrier", "fetch_barrier"};
41+
42+
auto &ops = Get<const std::vector<OpDesc *>>(kAllOpDescs);
43+
std::vector<ir::Node *> op_node_list;
44+
op_node_list.reserve(ops.size());
45+
46+
std::unordered_map<ir::Node *, size_t> op_deps;
47+
std::unordered_map<ir::Node *, std::unordered_set<ir::Node *>> pending_ops;
48+
std::unordered_set<ir::Node *> ready_ops;
49+
50+
for (ir::Node *node : graph->Nodes()) {
51+
if (!node->IsOp()) continue;
52+
std::unordered_set<ir::Node *> preceding_ops;
53+
for (auto *in : node->inputs) {
54+
PADDLE_ENFORCE(in->IsVar(),
55+
"Preceding Node of Op Nodes must be Var Node");
56+
if (in->inputs.empty()) continue;
57+
PADDLE_ENFORCE(in->inputs.size() == 1 && in->inputs[0]->IsOp(),
58+
"Preceding Op Node of Var Node must be unique");
59+
preceding_ops.insert(in->inputs[0]);
60+
pending_ops[in->inputs[0]].insert(node);
61+
}
62+
op_deps[node] = preceding_ops.size();
63+
if (preceding_ops.empty()) {
64+
ready_ops.insert(node);
65+
}
66+
}
67+
68+
for (auto *op_desc : ops) {
69+
ir::Node *found_node = nullptr;
70+
for (auto *node : ready_ops) {
71+
if (IsSameOpDesc(op_desc, node->Op())) {
72+
PADDLE_ENFORCE(found_node == nullptr,
73+
"Found multiple op_desc in graph: %s", op_desc->Type());
74+
found_node = node;
75+
}
76+
}
77+
78+
PADDLE_ENFORCE_NOT_NULL(found_node, "Cannot find op_desc in graph: %s",
79+
op_desc->Type());
80+
for (auto *pending_op : pending_ops[found_node]) {
81+
if (--op_deps.at(pending_op) == 0) {
82+
ready_ops.insert(pending_op);
83+
}
84+
}
85+
ready_ops.erase(found_node);
86+
if (skip_dist_ops.count(op_desc->Type()) == 0) {
87+
op_node_list.push_back(found_node);
88+
}
89+
}
90+
91+
for (size_t i = 1; i < op_node_list.size(); ++i) {
92+
auto *dep_var = graph->CreateControlDepVar();
93+
op_node_list[i]->inputs.push_back(dep_var);
94+
op_node_list[i - 1]->outputs.push_back(dep_var);
95+
dep_var->outputs.push_back(op_node_list[i]);
96+
dep_var->inputs.push_back(op_node_list[i - 1]);
97+
VLOG(10) << "Add dependencies between " << op_node_list[i - 1]->Name()
98+
<< " and " << op_node_list[i]->Name();
99+
}
100+
return graph;
101+
}
102+
103+
} // namespace details
104+
} // namespace framework
105+
} // namespace paddle
106+
107+
REGISTER_PASS(sequential_execution_pass,
108+
paddle::framework::details::SequentialExecutionPass)
109+
.RequirePassAttr(paddle::framework::details::kAllOpDescs);
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include "paddle/fluid/framework/ir/graph.h"
18+
#include "paddle/fluid/framework/ir/pass.h"
19+
20+
namespace paddle {
21+
namespace framework {
22+
namespace details {
23+
24+
constexpr char kAllOpDescs[] = "all_op_descs";
25+
26+
class SequentialExecutionPass : public ir::Pass {
27+
protected:
28+
std::unique_ptr<ir::Graph> ApplyImpl(
29+
std::unique_ptr<ir::Graph> graph) const override;
30+
};
31+
32+
} // namespace details
33+
} // namespace framework
34+
} // namespace paddle

paddle/fluid/framework/ir/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ pass_library(conv_bn_fuse_pass inference)
4141
pass_library(seqconv_eltadd_relu_fuse_pass inference)
4242
if(WITH_MKLDNN)
4343
pass_library(mkldnn_placement_pass base)
44+
pass_library(depthwise_conv_mkldnn_pass base)
4445
pass_library(conv_bias_mkldnn_fuse_pass inference)
4546
pass_library(conv_relu_mkldnn_fuse_pass inference)
4647
pass_library(conv_elementwise_add_mkldnn_fuse_pass inference)
@@ -59,6 +60,7 @@ cc_test(graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph
5960
cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector)
6061
cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto)
6162
if (WITH_MKLDNN)
63+
cc_test(test_depthwise_conv_mkldnn_pass SRCS depthwise_conv_mkldnn_pass_tester.cc DEPS depthwise_conv_mkldnn_pass)
6264
cc_test(test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass)
6365
cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass)
6466
endif ()

paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ class ConvReLUFusePass : public FusePassBase {
3131
virtual ~ConvReLUFusePass() {}
3232

3333
protected:
34-
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
34+
std::unique_ptr<ir::Graph> ApplyImpl(
35+
std::unique_ptr<ir::Graph> graph) const override;
3536
};
3637

3738
} // namespace ir

paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h"
1616

1717
#include <gtest/gtest.h>
18+
#include "paddle/fluid/framework/op_proto_maker.h"
1819

1920
namespace paddle {
2021
namespace framework {
@@ -36,6 +37,8 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
3637
op->SetInput("X", inputs);
3738
}
3839
op->SetOutput("Out", outputs);
40+
op->SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(),
41+
static_cast<int>(OpRole::kForward));
3942
}
4043

4144
// a->OP0->b
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.h"
16+
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
17+
18+
namespace paddle {
19+
namespace framework {
20+
namespace ir {
21+
22+
#define GET_NODE(id, pattern) \
23+
PADDLE_ENFORCE(subgraph.count(pattern.RetrieveNode(#id)), \
24+
"pattern has no Node called %s", #id); \
25+
auto* id = subgraph.at(pattern.RetrieveNode(#id)); \
26+
PADDLE_ENFORCE_NOT_NULL(id, "subgraph has no node %s", #id);
27+
28+
std::unique_ptr<ir::Graph> DepthwiseConvMKLDNNPass::ApplyImpl(
29+
std::unique_ptr<ir::Graph> graph) const {
30+
PADDLE_ENFORCE(graph.get());
31+
FusePassBase::Init("depthwise_conv_mkldnn_pass", graph.get());
32+
GraphPatternDetector gpd;
33+
34+
auto* pattern = gpd.mutable_pattern();
35+
pattern->NewNode("depthwise_conv")
36+
->assert_is_op("depthwise_conv2d")
37+
->assert_op_attr("use_mkldnn", true);
38+
39+
int found_depthwise_conv_mkldnn_count = 0;
40+
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
41+
Graph* g) {
42+
VLOG(3) << "handle DepthwiseConvMKLDNN fuse";
43+
GET_NODE(depthwise_conv, (*pattern));
44+
depthwise_conv->Op()->SetType("conv2d");
45+
found_depthwise_conv_mkldnn_count++;
46+
};
47+
48+
gpd(graph.get(), handler);
49+
AddStatis(found_depthwise_conv_mkldnn_count);
50+
return graph;
51+
}
52+
53+
} // namespace ir
54+
} // namespace framework
55+
} // namespace paddle
56+
57+
REGISTER_PASS(depthwise_conv_mkldnn_pass,
58+
paddle::framework::ir::DepthwiseConvMKLDNNPass);

0 commit comments

Comments
 (0)