Skip to content

Commit 16f0994

Browse files
committed
Merge develop
test=develop
2 parents 63651c1 + 57dc3c1 commit 16f0994

File tree

179 files changed

+9064
-990
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

179 files changed

+9064
-990
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
6464
option(WITH_PSLIB "Compile with pslib support" OFF)
6565
option(WITH_CONTRIB "Compile the third-party contributation" OFF)
6666
option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF)
67+
# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
6768
option(WITH_ANAKIN "Compile with Anakin library" OFF)
6869
option(ANAKIN_BUILD_FAT_BIN "Build anakin cuda fat-bin lib for all device plantform, ignored when WITH_ANAKIN=OFF" OFF)
6970
option(ANAKIN_BUILD_CROSS_PLANTFORM "Build anakin lib for any nvidia device plantform. ignored when WITH_ANAKIN=OFF" ON)
@@ -190,6 +191,7 @@ include(configure) # add paddle env configuration
190191
if(WITH_GPU)
191192
include(cuda)
192193
include(tensorrt)
194+
include(anakin_subgraph)
193195
endif()
194196
if(WITH_MKL OR WITH_MKLML)
195197
include(external/anakin)

CONTRIBUTING.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ python \
156156

157157
This will enable VLOG messages generated by `buddy_allocator.{h,cc}` and in the verbose range of 0 to 3, so you will see above example VLOG message, which is in level 3. This suggests that we output overall messages in lower verbose levels, so they display with higher probability. When coding C++, please follow the verbose level convention as follows:
158158

159-
- verbose level 1: [framework](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework)
160-
- verbose level 3: [operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)
161-
- verbose level 5: [memory](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/memory), [platform](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/platform)
162-
- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/legacy/math)
159+
- verbose level 1: [framework](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/framework)
160+
- verbose level 3: [operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/operators)
161+
- verbose level 5: [memory](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/memory), [platform](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/platform)
162+
- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/operators/math/)

cmake/anakin_subgraph.cmake

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
if(NOT WITH_GPU)
2+
return()
3+
endif()
4+
5+
set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT")
6+
find_path(ANAKIN_INCLUDE_DIR anakin_config.h
7+
PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include
8+
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/include
9+
NO_DEFAULT_PATH
10+
)
11+
12+
find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so
13+
PATHS ${ANAKIN_ROOT}
14+
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/lib
15+
NO_DEFAULT_PATH
16+
DOC "Path to ANAKIN library.")
17+
18+
if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY)
19+
if(WITH_DSO)
20+
set(ANAKIN_FOUND ON)
21+
endif(WITH_DSO)
22+
else()
23+
set(ANAKIN_FOUND OFF)
24+
endif()
25+
26+
if(ANAKIN_FOUND)
27+
message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ")
28+
include_directories(${ANAKIN_ROOT}/include)
29+
include_directories(${ANAKIN_ROOT}/include/saber)
30+
link_directories(${ANAKIN_ROOT})
31+
add_definitions(-DPADDLE_WITH_ANAKIN)
32+
endif()

cmake/tensorrt.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,6 @@ if(TENSORRT_FOUND)
3333
message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
3434
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
3535
include_directories(${TENSORRT_INCLUDE_DIR})
36+
link_directories(${TENSORRT_LIBRARY})
3637
add_definitions(-DPADDLE_WITH_TENSORRT)
3738
endif()

paddle/fluid/API.spec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,7 @@ paddle.fluid.unique_name.guard (ArgSpec(args=['new_generator'], varargs=None, ke
520520
paddle.fluid.recordio_writer.convert_reader_to_recordio_file (ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)), ('document', '65c7523e86f0c50bb729b01667f36310'))
521521
paddle.fluid.recordio_writer.convert_reader_to_recordio_files (ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)), ('document', 'bc643f0f5f1b9db57ff0d8a57d379bd7'))
522522
paddle.fluid.Scope Scope() -> paddle.fluid.core._Scope
523+
paddle.fluid.install_check.run_check (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '66b7c84a17ed32fec2df9628367be2b9'))
523524
paddle.reader.cache (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', '1676886070eb607cb608f7ba47be0d3c'))
524525
paddle.reader.map_readers (ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None), ('document', '77cbadb09df588e21e5cc0819b69c87d'))
525526
paddle.reader.buffered (ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None), ('document', '0d6186f109feceb99f60ec50a0a624cb'))

paddle/fluid/framework/details/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ cc_library(scale_loss_grad_op_handle SRCS scale_loss_grad_op_handle.cc DEPS op_h
55
cc_library(fetch_op_handle SRCS fetch_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory)
66
cc_library(computation_op_handle SRCS computation_op_handle.cc DEPS framework_proto scope place operator op_registry)
77
cc_library(rpc_op_handle SRCS rpc_op_handle.cc DEPS framework_proto scope place operator op_registry)
8+
cc_library(fetch_barrier_op_handle SRCS fetch_barrier_op_handle.cc DEPS framework_proto scope place operator op_registry)
89

910
cc_library(multi_devices_helper SRCS multi_devices_helper.cc DEPS graph graph_helper)
1011
cc_library(multi_devices_graph_print_pass SRCS multi_devices_graph_print_pass.cc DEPS multi_devices_helper)
@@ -72,7 +73,7 @@ cc_library(sequential_execution_pass SRCS sequential_execution_pass.cc DEPS grap
7273
cc_library(all_reduce_deps_pass SRCS all_reduce_deps_pass.cc DEPS graph graph_helper pass)
7374

7475
cc_library(multi_devices_graph_pass SRCS multi_devices_graph_pass.cc DEPS multi_devices_helper computation_op_handle
75-
scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle fused_broadcast_op_handle)
76+
scale_loss_grad_op_handle rpc_op_handle fetch_barrier_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle fused_broadcast_op_handle)
7677

7778
cc_library(fuse_all_reduce_op_pass SRCS fuse_all_reduce_op_pass.cc DEPS graph graph_helper fused_all_reduce_op_handle)
7879

paddle/fluid/framework/details/all_reduce_deps_pass.cc

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// limitations under the License.
1414

1515
#include <algorithm>
16+
#include <memory>
1617
#include <string>
1718
#include <unordered_map>
1819
#include <unordered_set>
@@ -52,13 +53,28 @@ std::unique_ptr<ir::Graph> AllReduceDepsPass::ApplyImpl(
5253
// Note that must assert topology sort is stable
5354
auto& ops = graph->Get<const std::vector<OpDesc*>>(kStaleProgramOpDescs);
5455
for (auto* op_desc : ops) {
55-
auto outputs = op_desc->Outputs();
56-
for (auto& o_it : outputs) {
57-
for (auto& v : o_it.second) { // values
58-
vars[v] = order;
56+
try {
57+
bool is_bk_op =
58+
static_cast<bool>(boost::get<int>(op_desc->GetAttr(
59+
OpProtoAndCheckerMaker::OpRoleAttrName())) &
60+
static_cast<int>(OpRole::kBackward));
61+
if (!is_bk_op) continue;
62+
63+
auto backward_vars =
64+
boost::get<std::vector<std::string>>(op_desc->GetNullableAttr(
65+
OpProtoAndCheckerMaker::OpRoleVarAttrName()));
66+
PADDLE_ENFORCE_EQ(backward_vars.size() % 2, 0);
67+
68+
auto outputs = op_desc->Outputs();
69+
for (auto& o_it : outputs) {
70+
for (auto& v : o_it.second) { // values
71+
vars[v] = order;
72+
VLOG(1) << "in all_reduce_deps_pass:" << v;
73+
}
5974
}
75+
order++;
76+
} catch (boost::bad_get e) {
6077
}
61-
order++;
6278
}
6379

6480
std::vector<OpHandleBase*> dist_ops;
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/framework/details/fetch_barrier_op_handle.h"
16+
17+
#include <string>
18+
19+
namespace paddle {
20+
namespace framework {
21+
namespace details {
22+
FetchBarrierOpHandle::FetchBarrierOpHandle(
23+
ir::Node *node, const std::vector<Scope *> &local_scopes,
24+
const std::vector<platform::Place> &places)
25+
// fetch_barrier op always run on place0, but output on all places.
26+
: OpHandleBase(node),
27+
op_(framework::OpRegistry::CreateOp(*node->Op())),
28+
local_scopes_(local_scopes),
29+
places_(places),
30+
run_scope_(local_scopes[0]),
31+
place_(places[0]) {
32+
for (auto &p : places) {
33+
this->SetDeviceContext(p, platform::DeviceContextPool::Instance().Get(p));
34+
}
35+
}
36+
37+
bool FetchBarrierOpHandle::IsMultiDeviceTransfer() {
38+
// override IsMultiDeviceTransfer to return true
39+
return true;
40+
}
41+
42+
void FetchBarrierOpHandle::RunImpl() {
43+
WaitInputVarGenerated(place_);
44+
45+
auto run_func = [this]() {
46+
op_->Run(*run_scope_->FindVar(kLocalExecScopeName)->Get<Scope *>(), place_);
47+
};
48+
49+
if (is_lock_and_record_event_free_) {
50+
run_func();
51+
} else {
52+
this->RunAndRecordEvent(run_func);
53+
}
54+
}
55+
56+
bool FetchBarrierOpHandle::NeedWait(VarHandleBase *in_var) {
57+
bool need_wait =
58+
in_var && in_var->GeneratedOp() &&
59+
in_var->GeneratedOp()->DeviceContext(place_) != dev_ctxes_.at(place_);
60+
return need_wait;
61+
}
62+
63+
std::string FetchBarrierOpHandle::Name() const { return op_->Type(); }
64+
} // namespace details
65+
} // namespace framework
66+
} // namespace paddle
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <memory>
18+
#include <string>
19+
#include <vector>
20+
21+
#include "paddle/fluid/framework/details/op_handle_base.h"
22+
#include "paddle/fluid/framework/feed_fetch_type.h"
23+
#include "paddle/fluid/framework/op_registry.h"
24+
#include "paddle/fluid/framework/scope.h"
25+
#include "paddle/fluid/platform/device_context.h"
26+
27+
namespace paddle {
28+
namespace framework {
29+
namespace details {
30+
31+
// **NOTE**: fetch_barrier op is special it outputs all recved variables on
32+
// all places if there are multiple places, must init with
33+
// multiple dev_ctxes_ !!!!
34+
35+
struct FetchBarrierOpHandle : public OpHandleBase {
36+
public:
37+
FetchBarrierOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
38+
const std::vector<platform::Place> &places);
39+
40+
bool IsMultiDeviceTransfer() override;
41+
42+
std::string Name() const override;
43+
44+
protected:
45+
void RunImpl() override;
46+
47+
bool NeedWait(VarHandleBase *in_var) override;
48+
49+
private:
50+
std::unique_ptr<OperatorBase> op_;
51+
std::vector<Scope *> local_scopes_;
52+
std::vector<platform::Place> places_;
53+
Scope *run_scope_;
54+
platform::Place place_;
55+
56+
bool is_lock_and_record_event_free_{false};
57+
};
58+
59+
} // namespace details
60+
} // namespace framework
61+
} // namespace paddle

0 commit comments

Comments
 (0)