Skip to content

Commit c2bc700

Browse files
committed
Merge remote-tracking branch 'origin/develop' into fix/mem_opt
2 parents 8ee3bdb + aa6b2bd commit c2bc700

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+1889
-469
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,4 @@ third_party/
2828
build_*
2929
# clion workspace.
3030
cmake-build-*
31+
model_test

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ option(WITH_ANAKIN "Compile with Anakin library" OFF)
6969
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
7070
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
7171
option(WITH_INFERENCE "Compile fluid inference library" ON)
72+
option(ON_INFER "Turn on inference optimization." OFF)
7273
option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF)
7374
option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
7475
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
@@ -179,6 +180,7 @@ include(external/eigen) # download eigen3
179180
include(external/pybind11) # download pybind11
180181
include(external/cares)
181182
include(external/cub)
183+
include(external/xxhash) # download xxhash
182184

183185
if (NOT WIN32)
184186
# there is no official support of snappystream, warpctc, nccl, cupti in windows
@@ -301,3 +303,8 @@ if(WITH_DOC)
301303
find_python_module(recommonmark REQUIRED)
302304
add_subdirectory(doc)
303305
endif()
306+
307+
if (ON_INFER)
308+
message(WARNING "On inference mode, will take place some specific optimization.")
309+
add_definitions(-DPADDLE_ON_INFERENCE)
310+
endif()

Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,14 @@ RUN pip3 install -U wheel && \
7575
pip3 install -U docopt PyYAML sphinx==1.5.6 && \
7676
pip3 install sphinx-rtd-theme==0.1.9 recommonmark && \
7777
easy_install -U pip && \
78-
pip install -U wheel && \
78+
pip install -U pip setuptools wheel && \
7979
pip install -U docopt PyYAML sphinx==1.5.6 && \
8080
pip install sphinx-rtd-theme==0.1.9 recommonmark
8181

82-
RUN pip3 install pre-commit 'ipython==5.3.0' && \
82+
RUN pip3 install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
8383
pip3 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
8484
pip3 install opencv-python && \
85-
pip install pre-commit 'ipython==5.3.0' && \
85+
pip install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
8686
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
8787
pip install opencv-python
8888

cmake/external/xxhash.cmake

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
INCLUDE(ExternalProject)
2+
3+
set(XXHASH_SOURCE_DIR ${THIRD_PARTY_PATH}/xxhash)
4+
set(XXHASH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/xxhash)
5+
set(XXHASH_INCLUDE_DIR "${XXHASH_INSTALL_DIR}/include")
6+
7+
IF(WITH_STATIC_LIB)
8+
SET(BUILD_CMD make lib)
9+
ELSE()
10+
SET(BUILD_CMD sed -i "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" ${XXHASH_SOURCE_DIR}/src/extern_xxhash/Makefile && make lib)
11+
ENDIF()
12+
13+
ExternalProject_Add(
14+
extern_xxhash
15+
${EXTERNAL_PROJECT_LOG_ARGS}
16+
GIT_REPOSITORY "https://github.com/Cyan4973/xxHash"
17+
GIT_TAG "v0.6.5"
18+
PREFIX ${XXHASH_SOURCE_DIR}
19+
DOWNLOAD_NAME "xxhash"
20+
UPDATE_COMMAND ""
21+
CONFIGURE_COMMAND ""
22+
BUILD_IN_SOURCE 1
23+
PATCH_COMMAND
24+
BUILD_COMMAND ${BUILD_CMD}
25+
INSTALL_COMMAND export PREFIX=${XXHASH_INSTALL_DIR}/ && make install
26+
TEST_COMMAND ""
27+
)
28+
29+
set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/libxxhash.a")
30+
INCLUDE_DIRECTORIES(${XXHASH_INCLUDE_DIR})
31+
32+
add_library(xxhash STATIC IMPORTED GLOBAL)
33+
set_property(TARGET xxhash PROPERTY IMPORTED_LOCATION ${XXHASH_LIBRARIES})
34+
include_directories(${XXHASH_INCLUDE_DIR})
35+
add_dependencies(xxhash extern_xxhash)
36+
37+
LIST(APPEND external_project_dependencies xxhash)
38+
39+
IF(WITH_C_API)
40+
INSTALL(DIRECTORY ${XXHASH_INCLUDE_DIR} DESTINATION third_party/xxhash)
41+
IF(ANDROID)
42+
INSTALL(FILES ${XXHASH_LIBRARIES} DESTINATION third_party/xxhash/lib/${ANDROID_ABI})
43+
ELSE()
44+
INSTALL(FILES ${XXHASH_LIBRARIES} DESTINATION third_party/xxhash/lib)
45+
ENDIF()
46+
ENDIF()

cmake/inference_lib.cmake

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414

1515
# make package for paddle fluid shared and static library
1616
function(copy TARGET)
17+
if (NOT ON_INFER)
18+
message(WARNING "Turn on the ON_INFER flag when building inference_lib only.")
19+
endif()
1720
set(options "")
1821
set(oneValueArgs "")
1922
set(multiValueArgs SRCS DSTS DEPS)
@@ -31,7 +34,7 @@ function(copy TARGET)
3134
foreach(index RANGE ${len})
3235
list(GET copy_lib_SRCS ${index} src)
3336
list(GET copy_lib_DSTS ${index} dst)
34-
add_custom_command(TARGET ${TARGET} PRE_BUILD
37+
add_custom_command(TARGET ${TARGET} PRE_BUILD
3538
COMMAND mkdir -p "${dst}"
3639
COMMAND cp -r "${src}" "${dst}"
3740
COMMENT "copying ${src} -> ${dst}")
@@ -67,6 +70,13 @@ copy(boost_lib
6770
DEPS boost
6871
)
6972

73+
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/xxhash")
74+
copy(xxhash_lib
75+
SRCS ${XXHASH_INCLUDE_DIR} ${XXHASH_LIBRARIES}
76+
DSTS ${dst_dir} ${dst_dir}/lib
77+
DEPS xxhash
78+
)
79+
7080
if(NOT PROTOBUF_FOUND)
7181
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/protobuf")
7282
copy(protobuf_lib
@@ -186,7 +196,7 @@ copy(cmake_cache
186196
DSTS ${FLUID_INSTALL_DIR})
187197

188198
# This command generates a complete fluid library for both train and inference
189-
add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
199+
add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
190200

191201
# Following commands generate a inference-only fluid library
192202
# third_party, version.txt and CMakeCache.txt are the same position with ${FLUID_INSTALL_DIR}

paddle/fluid/API.spec

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ paddle.fluid.layers.reduce_prod ArgSpec(args=['input', 'dim', 'keep_dim', 'name'
8686
paddle.fluid.layers.sequence_first_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
8787
paddle.fluid.layers.sequence_last_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
8888
paddle.fluid.layers.sequence_slice ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,))
89-
paddle.fluid.layers.dropout ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name'], varargs=None, keywords=None, defaults=(False, None, None))
89+
paddle.fluid.layers.dropout ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer'))
9090
paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None))
9191
paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,))
9292
paddle.fluid.layers.edit_distance ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None))
@@ -107,7 +107,7 @@ paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label',
107107
paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None))
108108
paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None)
109109
paddle.fluid.layers.autoincreased_step_counter ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1))
110-
paddle.fluid.layers.reshape ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, True, None))
110+
paddle.fluid.layers.reshape ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, False, None))
111111
paddle.fluid.layers.squeeze ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,))
112112
paddle.fluid.layers.unsqueeze ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,))
113113
paddle.fluid.layers.lod_reset ArgSpec(args=['x', 'y', 'target_lod'], varargs=None, keywords=None, defaults=(None, None))
@@ -174,7 +174,9 @@ paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None
174174
paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None))
175175
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'name'], varargs=None, keywords=None, defaults=(None,))
176176
paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
177+
paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
177178
paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None))
179+
paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None))
178180
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
179181
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
180182
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)

paddle/fluid/framework/details/multi_devices_graph_pass.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,9 +252,9 @@ std::vector<ir::Node *> SortOpsAndDelayOptimizeOp(const ir::Graph &graph) {
252252
std::vector<ir::Node *> sorted_ret;
253253
for (size_t i = 0; i < ret.size(); ++i) {
254254
if (i < last_backward) {
255-
if (boost::get<int>(ret[i]->Op()->GetAttr(
256-
OpProtoAndCheckerMaker::OpRoleAttrName())) ==
257-
static_cast<int>(OpRole::kOptimize)) {
255+
if (static_cast<bool>(boost::get<int>(ret[i]->Op()->GetAttr(
256+
OpProtoAndCheckerMaker::OpRoleAttrName())) &
257+
static_cast<int>(OpRole::kOptimize))) {
258258
optimize_ops.push_back(ret[i]);
259259
} else {
260260
sorted_ret.push_back(ret[i]);

paddle/fluid/framework/ir/graph_helper.cc

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,19 +120,25 @@ size_t GraphNum(const Graph &graph) {
120120
std::deque<ir::Node *> q_nodes;
121121
std::vector<std::unordered_set<ir::Node *>> graph_nodes;
122122
std::unordered_set<ir::Node *> g_nodes;
123+
// q_set used to record records in the queue.
124+
std::unordered_set<ir::Node *> q_set;
123125
size_t graph_count = 0;
124126

125-
auto traverse_nodes = [&visited_nodes,
126-
&q_nodes](const std::vector<ir::Node *> &nodes) {
127-
std::copy_if(
128-
nodes.begin(), nodes.end(), std::back_inserter(q_nodes),
129-
[&visited_nodes](Node *node) { return !visited_nodes.count(node); });
127+
auto traverse_nodes = [&visited_nodes, &q_nodes,
128+
&q_set](const std::vector<ir::Node *> &nodes) {
129+
for (auto n : nodes) {
130+
if (visited_nodes.count(n) == 0 && q_set.count(n) == 0) {
131+
q_nodes.push_back(n);
132+
q_set.insert(n);
133+
}
134+
}
130135
};
131136

132137
while (visited_nodes.size() != nodes.size()) {
133138
if (!q_nodes.empty()) {
134139
auto cur_node = q_nodes.front();
135140
q_nodes.pop_front();
141+
q_set.erase(cur_node);
136142
visited_nodes.insert(cur_node);
137143
g_nodes.insert(cur_node);
138144
traverse_nodes(cur_node->inputs);
@@ -146,6 +152,7 @@ size_t GraphNum(const Graph &graph) {
146152
for (auto &n : nodes) {
147153
if (visited_nodes.count(n) == 0) {
148154
q_nodes.push_back(n);
155+
q_set.insert(n);
149156
break;
150157
}
151158
}

paddle/fluid/framework/lod_tensor_array.h

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,82 @@ limitations under the License. */
1818

1919
namespace paddle {
2020
namespace framework {
21+
22+
// NOTE The vector<LoDTensor> can't be replaced with the class LoDTensorArray
23+
// directly, because there are many vector<LoDTensor> used accross the project,
24+
// and some of them are treated as LoDTensorArray.
25+
#if !defined(PADDLE_ON_INFERENCE)
26+
2127
using LoDTensorArray = std::vector<LoDTensor>;
22-
}
28+
29+
#else // !PADDLE_ON_INFERENCE
30+
31+
#pragma message "LoDTensorArray is replaced with the inference one."
32+
/*
33+
* A LoDTensorArray which will not deallocate buffer when resized, fix the data
34+
* diff in inference, and more performance friendly in the concurrency
35+
* scenerios.
36+
*/
37+
class LoDTensorArray {
38+
public:
39+
LoDTensorArray() = default;
40+
41+
using iterator = std::vector<LoDTensor>::iterator;
42+
using const_iterator = std::vector<LoDTensor>::const_iterator;
43+
44+
const_iterator begin() const { return array_.begin(); }
45+
const_iterator end() const { return array_.begin() + size_; }
46+
iterator begin() { return array_.begin(); }
47+
iterator end() { return array_.begin() + size_; }
48+
49+
void push_back(const LoDTensor& x) {
50+
if (size_ < array_.size()) {
51+
array_[size_++] = x;
52+
} else {
53+
array_.push_back(x);
54+
++size_;
55+
}
56+
}
57+
void resize(size_t size) {
58+
if (array_.size() < size) {
59+
array_.resize(size);
60+
}
61+
size_ = size;
62+
}
63+
64+
void emplace_back() { array_.emplace_back(); }
65+
66+
void emplace_back(LoDTensor&& x) { array_.emplace_back(std::move(x)); }
67+
68+
LoDTensor& back() { return array_.back(); }
69+
70+
size_t space() const { return array_.size(); }
71+
72+
void reserve(size_t size) {
73+
// Naive warning to tell user this array might be to large. The memory and
74+
// buffer used by this TensorArray will not be deleted during the training
75+
// and inference phase, so attention not to make it expand too long.
76+
if (size > 800UL) {
77+
LOG(WARNING) << "TensorArray has more than 800 items";
78+
}
79+
array_.reserve(size);
80+
}
81+
82+
bool empty() const { return size_ == 0UL; }
83+
void clear() { size_ = 0UL; }
84+
85+
LoDTensor& operator[](size_t id) { return array_[id]; }
86+
const LoDTensor& operator[](size_t id) const { return array_[id]; }
87+
LoDTensor& at(size_t id) { return array_.at(id); }
88+
const LoDTensor& at(size_t id) const { return array_.at(id); }
89+
90+
size_t size() const { return size_; }
91+
92+
private:
93+
size_t size_{0};
94+
std::vector<LoDTensor> array_;
95+
};
96+
#endif // !PADDLE_ON_INFERENCE
97+
98+
} // namespace framework
2399
} // namespace paddle

paddle/fluid/framework/mixed_vector.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,33 @@ class CPUVector : public std::vector<T, std::allocator<T>> {
542542
this->reserve(this->size() + size_t(end - begin));
543543
this->insert(this->end(), begin, end);
544544
}
545+
546+
const T *CUDAData(platform::Place place) const {
547+
PADDLE_THROW(
548+
"Vector::CUDAData() method is not supported in CPU-only version");
549+
}
550+
551+
T *CUDAMutableData(platform::Place place) {
552+
PADDLE_THROW(
553+
"Vector::CUDAMutableData() method is not supported in CPU-only "
554+
"version");
555+
}
556+
557+
const T *Data(platform::Place place) const {
558+
PADDLE_ENFORCE(
559+
platform::is_cpu_place(place),
560+
"Vector::Data() method is not supported when not in CPUPlace");
561+
return this->data();
562+
}
563+
564+
T *MutableData(platform::Place place) {
565+
PADDLE_ENFORCE(
566+
platform::is_cpu_place(place),
567+
"Vector::MutableData() method is not supported when not in CPUPlace");
568+
return this->data();
569+
}
570+
571+
const void *Handle() const { return static_cast<const void *>(this); }
545572
};
546573

547574
template <typename T>

0 commit comments

Comments
 (0)