Skip to content

Commit 82bb917

Browse files
committed
Merge remote-tracking branch 'ups/develop' into fix/op/elewise_add
2 parents 0507f7b + 99f74be commit 82bb917

23 files changed

+376
-172
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ elseif()
212212
set(WITH_ANAKIN OFF CACHE STRING "Anakin is used in GPU only now." FORCE)
213213
endif()
214214

215+
include(flags) # set paddle compile flags
215216
include(cudnn) # set cudnn libraries, must before configure
216217
include(cupti)
217218
include(configure) # add paddle env configuration
@@ -220,7 +221,6 @@ include(package) # set paddle packages
220221
include(ccache) # set ccache for compilation
221222
include(util) # set unittest and link libs
222223
include(rdma) # set rdma libraries
223-
include(flags) # set paddle compile flags
224224
include(version) # set PADDLE_VERSION
225225
include(coveralls) # set code coverage
226226
include(inference_lib) # add paddle fluid inference libraries

cmake/configure.cmake

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,16 @@ if(NOT WITH_PROFILER)
5050
endif(NOT WITH_PROFILER)
5151

5252
if(NOT CMAKE_CROSSCOMPILING)
53-
if(WITH_AVX AND AVX_FOUND)
53+
if(WITH_AVX AND AVX512F_FOUND)
54+
set(SIMD_FLAG ${AVX512F_FLAG})
55+
elseif(WITH_AVX AND AVX2_FOUND)
56+
set(SIMD_FLAG ${AVX2_FLAG})
57+
elseif(WITH_AVX AND AVX_FOUND)
5458
set(SIMD_FLAG ${AVX_FLAG})
5559
elseif(SSE3_FOUND)
5660
set(SIMD_FLAG ${SSE3_FLAG})
5761
endif()
5862
endif()
59-
if(UNIX AND NOT APPLE)
60-
# except apple from nix*Os family
61-
set(LINUX TRUE)
62-
endif(UNIX AND NOT APPLE)
6363

6464
if(NOT WITH_GOLANG)
6565
add_definitions(-DPADDLE_WITHOUT_GOLANG)

cmake/cudnn.cmake

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,25 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS
2525
$ENV{CUDNN_ROOT}
2626
$ENV{CUDNN_ROOT}/lib64
2727
$ENV{CUDNN_ROOT}/lib
28-
/usr/lib)
29-
find_library(CUDNN_LIBRARY NAMES libcudnn.so libcudnn.dylib # libcudnn_static.a
28+
/usr/lib
29+
${CUDA_TOOLKIT_ROOT_DIR}
30+
${CUDA_TOOLKIT_ROOT_DIR}/lib/x64
31+
)
32+
set(CUDNN_LIB_NAME "")
33+
if (LINUX)
34+
set(CUDNN_LIB_NAME "libcudnn.so")
35+
endif(LINUX)
36+
37+
if(WIN32)
38+
# only support cudnn7
39+
set(CUDNN_LIB_NAME "cudnn.lib" "cudnn64_7.dll")
40+
endif(WIN32)
41+
42+
if(Apple)
43+
set(CUDNN_LIB_NAME "libcudnn.dylib" "libcudnn.so")
44+
endif(Apple)
45+
46+
find_library(CUDNN_LIBRARY NAMES ${CUDNN_LIB_NAME} # libcudnn_static.a
3047
PATHS ${CUDNN_CHECK_LIBRARY_DIRS} ${CUDNN_INCLUDE_DIR} ${__libpath_hist}
3148
NO_DEFAULT_PATH
3249
DOC "Path to cuDNN library.")

cmake/flags.cmake

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,11 @@ else()
142142
${GPU_COMMON_FLAGS})
143143
endif()
144144

145+
if(UNIX AND NOT APPLE)
146+
# except apple from nix*Os family
147+
set(LINUX TRUE)
148+
endif(UNIX AND NOT APPLE)
149+
145150

146151
foreach(flag ${COMMON_FLAGS})
147152
safe_set_cflag(CMAKE_C_FLAGS ${flag})

cmake/simd.cmake

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID
1010
set(SSE3_FLAG "-msse3")
1111
set(AVX_FLAG "-mavx")
1212
set(AVX2_FLAG "-mavx2")
13+
set(AVX512F_FLAG "-mavx512f")
1314
elseif(MSVC)
1415
set(MMX_FLAG "/arch:MMX")
1516
set(SSE2_FLAG "/arch:SSE2")
@@ -81,5 +82,16 @@ int main()
8182
return 0;
8283
}" AVX2_FOUND)
8384

85+
# Check AVX512F
86+
set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG})
87+
set(AVX512F_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
88+
CHECK_CXX_SOURCE_RUNS("
89+
#include <immintrin.h>
90+
int main()
91+
{
92+
__m512i a = _mm512_undefined_epi32();
93+
return 0;
94+
}" AVX512F_FOUND)
95+
8496
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED})
85-
mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND)
97+
mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND)

paddle/fluid/framework/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,13 @@ else()
9999
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method)
100100
endif()
101101

102-
102+
if (NOT WIN32)
103103
cc_library(parallel_executor SRCS parallel_executor.cc DEPS
104104
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
105105
graph graph_viz_pass multi_devices_graph_pass
106106
multi_devices_graph_print_pass multi_devices_graph_check_pass
107107
fast_threaded_ssa_graph_executor)
108+
endif() # NOT WIN32
108109

109110
cc_library(prune SRCS prune.cc DEPS framework_proto)
110111
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)

paddle/fluid/framework/program_desc.cc

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,20 @@ ProgramDesc::ProgramDesc(const ProgramDesc &o) {
5555
auto all_ops = blocks_[block_id]->AllOps();
5656
for (size_t op_id = 0; op_id < all_ops.size(); ++op_id) {
5757
auto &op = all_ops[op_id];
58+
5859
for (const std::string &attr_name : op->AttrNames()) {
5960
if (op->GetAttrType(attr_name) == proto::AttrType::BLOCK) {
6061
int sub_block_id =
6162
o.Block(block_id).Op(op_id)->GetBlockAttrId(attr_name);
6263
op->SetBlockAttr(attr_name, MutableBlock(sub_block_id));
64+
} else if (op->GetAttrType(attr_name) == proto::AttrType::BLOCKS) {
65+
std::vector<int> sub_block_ids =
66+
o.Block(block_id).Op(op_id)->GetBlocksAttrIds(attr_name);
67+
std::vector<BlockDesc *> block_descs;
68+
for (int block_id : sub_block_ids) {
69+
block_descs.push_back(MutableBlock(block_id));
70+
}
71+
op->SetBlocksAttr(attr_name, block_descs);
6372
}
6473
}
6574
}
@@ -68,24 +77,16 @@ ProgramDesc::ProgramDesc(const ProgramDesc &o) {
6877

6978
ProgramDesc::ProgramDesc(const proto::ProgramDesc &desc) {
7079
desc_ = desc;
71-
for (auto &block_desc : *desc_.mutable_blocks()) {
72-
blocks_.emplace_back(new BlockDesc(this, &block_desc));
73-
}
74-
for (auto &block : blocks_) {
75-
for (auto *op : block->AllOps()) {
76-
for (const auto &attr : op->Proto()->attrs()) {
77-
if (attr.type() == proto::AttrType::BLOCK) {
78-
size_t blk_idx = attr.block_idx();
79-
op->SetBlockAttr(attr.name(), this->MutableBlock(blk_idx));
80-
}
81-
}
82-
}
83-
}
80+
InitFromProto();
8481
}
8582

8683
ProgramDesc::ProgramDesc(const std::string &binary_str) {
8784
PADDLE_ENFORCE(desc_.ParseFromString(binary_str),
8885
"Fail to parse program_desc from binary string.");
86+
InitFromProto();
87+
}
88+
89+
void ProgramDesc::InitFromProto() {
8990
for (auto &block_desc : *desc_.mutable_blocks()) {
9091
blocks_.emplace_back(new BlockDesc(this, &block_desc));
9192
}
@@ -95,6 +96,13 @@ ProgramDesc::ProgramDesc(const std::string &binary_str) {
9596
if (attr.type() == proto::AttrType::BLOCK) {
9697
size_t blk_idx = attr.block_idx();
9798
op->SetBlockAttr(attr.name(), this->MutableBlock(blk_idx));
99+
} else if (attr.type() == proto::AttrType::BLOCKS) {
100+
auto blks_idx = attr.blocks_idx();
101+
std::vector<BlockDesc *> block_descs;
102+
for (int blk_idx : blks_idx) {
103+
block_descs.push_back(this->MutableBlock(blk_idx));
104+
}
105+
op->SetBlocksAttr(attr.name(), block_descs);
98106
}
99107
}
100108
}

paddle/fluid/framework/program_desc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ class ProgramDesc {
7676
void SetFetchHolderName(const std::string &fetch_holder_name);
7777

7878
private:
79+
void InitFromProto();
80+
7981
proto::ProgramDesc desc_;
8082

8183
std::vector<std::unique_ptr<BlockDesc>> blocks_;

paddle/fluid/framework/program_desc_test.cc

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,19 @@ TEST(ProgramDesc, copy_ctor) {
4242
out->SetType(proto::VarType::LOD_TENSOR);
4343
op->SetOutput("Y", {out->Name()});
4444

45+
BlockDesc* new_block = program.AppendBlock(*global_block);
46+
op = new_block->AppendOp();
47+
op->SetType("mul");
48+
49+
op = global_block->AppendOp();
50+
op->SetType("op_with_subblock");
51+
op->SetAttr("sub_block", new_block);
52+
53+
std::vector<BlockDesc*> sub_blocks;
54+
sub_blocks.push_back(program.AppendBlock(*global_block));
55+
sub_blocks.push_back(program.AppendBlock(*global_block));
56+
op->SetAttr("sub_blocks", sub_blocks);
57+
4558
ProgramDesc program_copy(program);
4659

4760
auto* global_block_copy = program_copy.MutableBlock(0);
@@ -64,6 +77,8 @@ TEST(ProgramDesc, copy_ctor) {
6477
assert_same_var("Y", y);
6578
assert_same_var("Out", out);
6679

80+
bool found_sub_block = false;
81+
bool found_sub_blocks = false;
6782
for (size_t i = 0; i < global_block->OpSize(); ++i) {
6883
auto op_origin = global_block->Op(i);
6984
auto op_copy = global_block_copy->Op(i);
@@ -74,8 +89,17 @@ TEST(ProgramDesc, copy_ctor) {
7489

7590
ASSERT_EQ(op_copy->Proto()->SerializeAsString(),
7691
op_origin->Proto()->SerializeAsString());
77-
}
7892

93+
if (op->Type() == "op_with_subblock") {
94+
ASSERT_EQ(1, op->GetBlockAttrId("sub_block"));
95+
found_sub_block = true;
96+
97+
ASSERT_EQ(2, op->GetBlocksAttrIds("sub_blocks").size());
98+
found_sub_blocks = true;
99+
}
100+
}
101+
ASSERT_TRUE(found_sub_block);
102+
ASSERT_TRUE(found_sub_blocks);
79103
// Not check block's protostr are same it because the order of vars could be
80104
// different and it is correct.
81105
}

paddle/fluid/inference/api/high_level_api_cn.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,13 @@ config.model_dir = "xxx";
6565
config.use_gpu = false;
6666
// 创建一个原生的 PaddlePredictor
6767
auto predictor =
68-
paddle::CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
68+
paddle::CreatePaddlePredictor<paddle::NativeConfig, paddle::PaddleEngineKind::kNative>(config);
6969
// 创建输入 tensor
7070
int64_t data[4] = {1, 2, 3, 4};
7171
paddle::PaddleTensor tensor{.name = "",
7272
.shape = std::vector<int>({4, 1}),
73-
.data = PaddleBuf(data, sizeof(data)),
74-
.dtype = PaddleDType::INT64};
73+
.data = paddle::PaddleBuf(data, sizeof(data)),
74+
.dtype = paddle::PaddleDType::INT64};
7575
// 创建输出 tensor,输出 tensor 的内存可以复用
7676
std::vector<paddle::PaddleTensor> outputs;
7777
// 执行预测

0 commit comments

Comments
 (0)