Skip to content

Commit 1083e99

Browse files
committed
Merge develop
2 parents e895c98 + f0f0699 commit 1083e99

34 files changed

+1582
-130
lines changed

CMakeLists.txt

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -138,12 +138,6 @@ else()
138138
set(THIRD_PARTY_BUILD_TYPE Release)
139139
endif()
140140

141-
if(WITH_MKL)
142-
option(MKL_SPLIT_GEMM "PaddlePaddle MKL gemm would split to small ones" OFF)
143-
if (MKL_SPLIT_GEMM)
144-
add_definitions(-DPADDLE_MKL_SPLIT_GEMM)
145-
endif()
146-
endif()
147141
set(WITH_MKLML ${WITH_MKL})
148142
if (NOT DEFINED WITH_MKLDNN)
149143
if (WITH_MKL AND AVX2_FOUND)

doc/fluid/dev/new_op_cn.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,19 +36,19 @@
3636
<tbody>
3737
<tr>
3838
<td>OpProtoMake定义 </td>
39-
<td>`.cc`文件,Backward Op不需要定义OpProtoMake </td>
39+
<td>.cc 文件,Backward Op不需要定义OpProtoMake </td>
4040
</tr>
4141
<tr>
4242
<td>Op定义 </td>
43-
<td> `.cc`文件</td>
43+
<td> .cc 文件</td>
4444
</tr>
4545
<tr>
4646
<td>Kernel实现 </td>
47-
<td> CPU、CUDA共享Kernel实现在`.h`文件中,否则,CPU 实现在`.cc`文件中,CUDA 实现在`.cu`文件中。</td>
47+
<td> CPU、CUDA共享Kernel实现在.h 文件中,否则,CPU 实现在.cc 文件中,CUDA 实现在.cu 文件中。</td>
4848
</tr>
4949
<tr>
5050
<td>注册Op </td>
51-
<td> Op注册实现在`.cc`文件;Kernel注册CPU实现在`.cc`文件中,CUDA实现在`.cu`文件中</td>
51+
<td> Op注册实现在.cc 文件;Kernel注册CPU实现在.cc 文件中,CUDA实现在.cu 文件中</td>
5252
</tr>
5353
</tbody>
5454
</table>
@@ -391,7 +391,7 @@ PADDLE_ENFORCE(ctx->HasInput("X"), "");
391391
```
392392
问题示例2 :提示信息过于简单
393393
```
394-
PADDLE_ENFORCE(i != nullptr, "I must be set"); // I是什么
394+
PADDLE_ENFORCE(i != nullptr, "i must be set"); // i是什么
395395
```
396396

397397
2. 在报错信息中使用开发人员定义的变量缩写,不易理解!

paddle/fluid/API.spec

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], v
163163
paddle.fluid.layers.prelu ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None))
164164
paddle.fluid.layers.flatten ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None))
165165
paddle.fluid.layers.sequence_mask ArgSpec(args=['x', 'maxlen', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 'int64', None))
166+
paddle.fluid.layers.stack ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,))
166167
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
167168
paddle.fluid.layers.open_recordio_file ArgSpec(args=['filename', 'shapes', 'lod_levels', 'dtypes', 'pass_num', 'for_parallel'], varargs=None, keywords=None, defaults=(1, True))
168169
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
@@ -192,7 +193,7 @@ paddle.fluid.layers.argsort ArgSpec(args=['input', 'axis', 'name'], varargs=None
192193
paddle.fluid.layers.ones ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
193194
paddle.fluid.layers.zeros ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
194195
paddle.fluid.layers.reverse ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None)
195-
paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
196+
paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None))
196197
paddle.fluid.layers.While.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
197198
paddle.fluid.layers.Switch.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
198199
paddle.fluid.layers.Switch.case ArgSpec(args=['self', 'condition'], varargs=None, keywords=None, defaults=None)

paddle/fluid/framework/array.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <cstdint>
18+
#include "paddle/fluid/platform/hostdevice.h"
19+
20+
namespace paddle {
21+
namespace framework {
22+
template <typename T, size_t N>
23+
class Array {
24+
static_assert(N > 0, "The size of array must be larger than 0");
25+
26+
public:
27+
HOSTDEVICE Array() {}
28+
29+
HOSTDEVICE explicit Array(const T &val) {
30+
for (size_t i = 0; i < N; ++i) data_[i] = val;
31+
}
32+
33+
HOSTDEVICE const T *Get() const { return data_; }
34+
35+
HOSTDEVICE T *GetMutable() { return data_; }
36+
37+
HOSTDEVICE T &operator[](size_t index) { return data_[index]; }
38+
39+
HOSTDEVICE const T &operator[](size_t index) const { return data_[index]; }
40+
41+
HOSTDEVICE constexpr size_t size() const { return N; }
42+
43+
private:
44+
T data_[N];
45+
};
46+
47+
} // namespace framework
48+
} // namespace paddle

paddle/fluid/framework/details/multi_devices_graph_pass.cc

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,8 @@ void MultiDevSSAGraphBuilder::CreateDistTrainOp(ir::Graph *result,
763763
// Create RPC related op handles that connects its in ops and out ops.
764764
void MultiDevSSAGraphBuilder::CreateRPCOp(ir::Graph *result,
765765
ir::Node *node) const {
766+
// FIXME(typhoonzero): Cleanup this deps for both sync mode and async mode
767+
// put them into transpiler.
766768
int op_dev_id = -1;
767769
if (node->Op()->Type() == "send") {
768770
// TODO(paddle-dev): getting the first var is not safe.
@@ -771,26 +773,42 @@ void MultiDevSSAGraphBuilder::CreateRPCOp(ir::Graph *result,
771773
"This hack no longer holds, please fix.");
772774
// the variable name which contains .block means it was splited by
773775
// split_byref op
774-
// so that we can balance the variable blocks to all the pserver
775-
// instances.
776776
if (strategy_.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce &&
777777
node->inputs[0]->Name().find(".block") == std::string::npos) {
778778
std::vector<std::string> input_var_names;
779779
for (ir::Node *n : node->inputs) {
780780
input_var_names.push_back(n->Name());
781781
}
782-
op_dev_id = GetAppropriateDeviceID(input_var_names);
782+
auto send_param_grad = boost::get<std::vector<std::string>>(
783+
node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName()));
784+
PADDLE_ENFORCE_EQ(send_param_grad.size(), 2U);
785+
op_dev_id = GetAppropriateDeviceID({send_param_grad[1]});
786+
VLOG(10) << "send grad " << input_var_names[0] << " origin "
787+
<< send_param_grad[1] << " place: " << op_dev_id;
783788
for (auto &varname : input_var_names) {
784789
result->Get<ShardedVarDevice>(kShardedVarDevice)
785790
.emplace(varname, op_dev_id);
786791
}
792+
result->Get<ShardedVarDevice>(kShardedVarDevice)
793+
.emplace(send_param_grad[1], op_dev_id);
787794
}
788795
} else if (node->Op()->Type() == "recv") {
789796
std::vector<std::string> output_var_names;
790797
for (ir::Node *n : node->outputs) {
791798
output_var_names.push_back(n->Name());
792799
}
793-
op_dev_id = GetAppropriateDeviceID(output_var_names);
800+
auto recv_param_grad = boost::get<std::vector<std::string>>(
801+
node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName()));
802+
// FIXME(typhoonzero): assume each recv op output one param
803+
// Use the same place as send.
804+
if (recv_param_grad.size() == 2U) {
805+
op_dev_id = GetVarDeviceID(*result, recv_param_grad[1]);
806+
VLOG(10) << "recv param " << recv_param_grad[0]
807+
<< " get grad place: " << recv_param_grad[1]
808+
<< " place: " << op_dev_id;
809+
} else {
810+
op_dev_id = GetAppropriateDeviceID(output_var_names);
811+
}
794812
for (auto &varname : output_var_names) {
795813
result->Get<ShardedVarDevice>(kShardedVarDevice)
796814
.emplace(varname, op_dev_id);

paddle/fluid/framework/details/multi_devices_graph_print_pass.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ void GraphvizSSAGraphPrinter::Print(const ir::Graph &graph,
5454
sout << "var_" << cur_var_id << " [label=\"" << var_handle_ptr->name_
5555
<< "\\n"
5656
<< var_handle_ptr->place_ << "\\n"
57-
<< var_handle_ptr->version_ << "\"]" << std::endl;
57+
<< "scope: " << var_handle_ptr->scope_idx_ << "\\n"
58+
<< "v" << var_handle_ptr->version_ << "\"]" << std::endl;
5859
} else if (dummy_ptr) {
5960
sout << "var_" << cur_var_id << " [label=\"dummy\"]" << std::endl;
6061
}

paddle/fluid/framework/ir/graph_pattern_detecter_tester.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,8 @@ TEST(GraphPatternDetecter, MultiSubgraph) {
163163
// 3. Detect op2 -> var2 -> op4
164164
// 4. Detect op2 -> var3 -> op5
165165
// But 2 and 3 and 4 overlapped, so keep 2, so the final choices are 1 and 2
166-
ASSERT_GE(count, 1UL);
167-
ASSERT_LE(count, 2UL);
166+
ASSERT_GE(count, 1);
167+
ASSERT_LE(count, 2);
168168
}
169169

170170
} // namespace ir

paddle/fluid/framework/ir/node.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ limitations under the License. */
1717
namespace paddle {
1818
namespace framework {
1919
namespace ir {
20-
const char Node::kControlDepVarName[] = "__control_var";
20+
constexpr char Node::kControlDepVarName[];
2121
} // namespace ir
2222
} // namespace framework
2323
} // namespace paddle

paddle/fluid/framework/ir/node.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ namespace ir {
2727
class Node {
2828
public:
2929
enum class Type { kOperation, kVariable };
30-
static const char kControlDepVarName[];
30+
static constexpr char kControlDepVarName[] = "__control_var";
3131

3232
explicit Node(const std::string& name, Type type)
3333
: name_(name), var_desc_(nullptr), op_desc_(nullptr), type_(type) {}

paddle/fluid/framework/selected_rows.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown) {
139139
}
140140
auto write_iter = id_to_index_.find(key);
141141
if (write_iter == id_to_index_.end()) {
142-
size_t row_num = rows_.size();
142+
int row_num = rows_.size();
143143
if (row_num == value_->dims()[0]) {
144144
rwlock_->UNLock();
145145
PADDLE_THROW("selected rows is full, then length exceed %d", row_num);
@@ -182,7 +182,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
182182
PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0],
183183
"output tensor should have the same shape with table "
184184
"except the dims[0].");
185-
for (size_t i = 0; i < ids.numel(); ++i) {
185+
for (int i = 0; i < ids.numel(); ++i) {
186186
int64_t index = AutoGrownIndex(ids.data<int64_t>()[i], auto_grown);
187187
framework::VisitDataType(
188188
framework::ToDataType(value_->type()),

0 commit comments

Comments
 (0)