Skip to content

Commit 7b9d9d7

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into add-code-exp
2 parents 9aa65bb + 6224e61 commit 7b9d9d7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+1252
-153
lines changed

Dockerfile

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,27 @@ ENV HOME /root
2222
# Add bash enhancements
2323
COPY ./paddle/scripts/docker/root/ /root/
2424

25+
# Prepare packages for Python
26+
RUN apt-get update && \
27+
apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev \
28+
libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev \
29+
xz-utils tk-dev libffi-dev liblzma-dev
30+
31+
# Install Python3.6
32+
RUN mkdir -p /root/python_build/ && wget -q https://www.sqlite.org/2018/sqlite-autoconf-3250300.tar.gz && \
33+
tar -zxf sqlite-autoconf-3250300.tar.gz && cd sqlite-autoconf-3250300 && \
34+
./configure -prefix=/usr/local && make -j8 && make install && cd ../ && rm sqlite-autoconf-3250300.tar.gz && \
35+
wget -q https://www.python.org/ftp/python/3.6.0/Python-3.6.0.tgz && \
36+
tar -xzf Python-3.6.0.tgz && cd Python-3.6.0 && \
37+
CFLAGS="-Wformat" ./configure --prefix=/usr/local/ --enable-shared > /dev/null && \
38+
make -j8 > /dev/null && make altinstall > /dev/null
39+
40+
# Install Python3.7
41+
RUN wget -q https://www.python.org/ftp/python/3.7.0/Python-3.7.0.tgz && \
42+
tar -xzf Python-3.7.0.tgz && cd Python-3.7.0 && \
43+
CFLAGS="-Wformat" ./configure --prefix=/usr/local/ --enable-shared > /dev/null && \
44+
make -j8 > /dev/null && make altinstall > /dev/null
45+
2546
RUN apt-get update && \
2647
apt-get install -y --allow-downgrades patchelf \
2748
python3 python3-dev python3-pip \
@@ -74,6 +95,12 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
7495
RUN pip3 install -U wheel && \
7596
pip3 install -U docopt PyYAML sphinx==1.5.6 && \
7697
pip3 install sphinx-rtd-theme==0.1.9 recommonmark && \
98+
pip3.6 install -U wheel && \
99+
pip3.6 install -U docopt PyYAML sphinx==1.5.6 && \
100+
pip3.6 install sphinx-rtd-theme==0.1.9 recommonmark && \
101+
pip3.7 install -U wheel && \
102+
pip3.7 install -U docopt PyYAML sphinx==1.5.6 && \
103+
pip3.7 install sphinx-rtd-theme==0.1.9 recommonmark && \
77104
easy_install -U pip && \
78105
pip install -U pip setuptools wheel && \
79106
pip install -U docopt PyYAML sphinx==1.5.6 && \
@@ -82,22 +109,34 @@ RUN pip3 install -U wheel && \
82109
RUN pip3 install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
83110
pip3 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
84111
pip3 install opencv-python && \
112+
pip3.6 install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
113+
pip3.6 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
114+
pip3.6 install opencv-python && \
115+
pip3.7 install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
116+
pip3.7 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
117+
pip3.7 install opencv-python && \
85118
pip install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
86119
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
87120
pip install opencv-python
88121

89122
#For docstring checker
90123
RUN pip3 install pylint pytest astroid isort
124+
RUN pip3.6 install pylint pytest astroid isort
125+
RUN pip3.7 install pylint pytest astroid isort
91126
RUN pip install pylint pytest astroid isort LinkChecker
92127

93128
COPY ./python/requirements.txt /root/
94129
RUN pip3 install -r /root/requirements.txt
130+
RUN pip3.6 install -r /root/requirements.txt
131+
RUN pip3.7 install -r /root/requirements.txt
95132
RUN pip install -r /root/requirements.txt
96133

97134
# To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use
98135
# the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2
99136
RUN apt-get install -y libssl-dev libffi-dev
100137
RUN pip3 install certifi urllib3[secure]
138+
RUN pip3.6 install certifi urllib3[secure]
139+
RUN pip3.7 install certifi urllib3[secure]
101140
RUN pip install certifi urllib3[secure]
102141

103142

cmake/operators.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,8 @@ function(op_library TARGET)
109109

110110
# Define operators that don't need pybind here.
111111
foreach(manual_pybind_op "compare_op" "logical_op" "nccl_op"
112-
"tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op")
112+
"tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op"
113+
"fusion_transpose_flatten_concat_op")
113114
if ("${TARGET}" STREQUAL "${manual_pybind_op}")
114115
set(pybind_flag 1)
115116
endif()

paddle/fluid/framework/CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,14 @@ cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
116116
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
117117
cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context)
118118

119+
if (NOT WIN32)
120+
cc_library(transfer_scope_cache SRCS transfer_scope_cache.cc DEPS scope framework_proto device_context)
121+
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
122+
shape_inference data_transform lod_tensor profiler transfer_scope_cache)
123+
else()
119124
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
120-
shape_inference data_transform lod_tensor profiler)
125+
shape_inference data_transform lod_tensor)
126+
endif(NOT WIN32)
121127

122128
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context)
123129

paddle/fluid/framework/executor.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ limitations under the License. */
2020
#include "paddle/fluid/framework/ngraph_operator.h"
2121
#include "paddle/fluid/framework/op_registry.h"
2222
#include "paddle/fluid/framework/reader.h"
23+
#include "paddle/fluid/framework/transfer_scope_cache.h"
2324
#include "paddle/fluid/operators/detail/macros.h"
2425
#include "paddle/fluid/platform/place.h"
2526
#include "paddle/fluid/platform/profiler.h"
@@ -391,8 +392,8 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
391392

392393
int64_t max_memory_size = GetEagerDeletionThreshold();
393394
std::unique_ptr<GarbageCollector<Tensor>> gc;
394-
// WhileOp would set keep_kids to false
395-
// WhileGradOp would need the scopes created in WhileOp
395+
// WhileOp would set keep_kids to true,
396+
// because WhileGradOp needs the scopes created in WhileOp.
396397
// Perhaps, we should not perform eager deletion in WhileOp
397398
// The scopes and variables created by WhileOp would be deleted
398399
// in WhileGradOp.

paddle/fluid/framework/naive_executor.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ void NaiveExecutor::Run() {
8383
for (auto &op : ops_) {
8484
VLOG(3) << std::this_thread::get_id() << " run " << op->Type()
8585
<< " on scope " << scope_;
86+
op->SetIsCalledByExecutor(false);
8687
op->Run(*scope_, place_);
8788
}
8889
}

paddle/fluid/framework/operator.cc

Lines changed: 17 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ limitations under the License. */
2222
#include "paddle/fluid/framework/lod_tensor.h"
2323
#include "paddle/fluid/framework/operator.h"
2424
#include "paddle/fluid/framework/shape_inference.h"
25+
#include "paddle/fluid/framework/transfer_scope_cache.h"
2526
#include "paddle/fluid/framework/var_type.h"
2627
#include "paddle/fluid/platform/profiler.h"
2728

@@ -33,11 +34,6 @@ DEFINE_bool(check_nan_inf, false,
3334
namespace paddle {
3435
namespace framework {
3536

36-
// Combine two hash values to a single hash.
37-
inline size_t CombineHash(size_t seed, size_t a) {
38-
return (seed ^ a) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
39-
}
40-
4137
std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority = {
4238
std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN),
4339
std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain),
@@ -797,17 +793,6 @@ void OperatorWithKernel::TransferInplaceVarsBack(
797793
Scope* OperatorWithKernel::TryTransferData(
798794
const Scope& scope, const OpKernelType& expected_kernel_key,
799795
std::vector<std::string>* transfered_inplace_vars) const {
800-
// In the inference scenerio, the scopes will be reused across the batches, so
801-
// the `new_scope` here will result in GPU memroy explosion over the running of
802-
// operators.
803-
// We use a thread_local cache to fix that issue, the key in the cache is the
804-
// combination of the `scope` argument, from_kernel_type, target_kernel_type.
805-
// Have a discussion with @Superjomn or the inference developers if some changes
806-
// on this logic for this macro might not tested on the other scenerios.
807-
#ifdef PADDLE_ON_INFERENCE
808-
thread_local std::unordered_map<size_t, Scope*> infer_transfer_scope_cache;
809-
#endif
810-
811796
Scope* new_scope = nullptr;
812797
for (auto& var_name_item : Inputs()) {
813798
for (auto& var_name : var_name_item.second) {
@@ -838,23 +823,23 @@ Scope* OperatorWithKernel::TryTransferData(
838823
VLOG(30) << "Transform Variable " << var_name << " from "
839824
<< kernel_type_for_var << " to " << expected_kernel_key;
840825

841-
#ifdef PADDLE_ON_INFERENCE
842-
size_t infer_cache_key =
843-
CombineHash(OpKernelType::Hash()(kernel_type_for_var),
844-
OpKernelType::Hash()(expected_kernel_key));
845-
infer_cache_key =
846-
CombineHash(infer_cache_key, std::hash<const Scope*>()(&scope));
847-
848-
auto it = infer_transfer_scope_cache.find(infer_cache_key);
849-
if (it != infer_transfer_scope_cache.end()) {
850-
new_scope = infer_transfer_scope_cache[infer_cache_key];
851-
} else {
852-
new_scope = &scope.NewScope();
853-
infer_transfer_scope_cache[infer_cache_key] = new_scope;
826+
// In the inference scenerio, the scopes will be reused across the
827+
// batches, so the `new_scope` here will result in GPU memroy explosion
828+
// over the running of operators.
829+
// We use a thread_local cache to fix that issue, the key in the cache is
830+
// the combination of the `scope` argument, from_kernel_type,
831+
// target_kernel_type.
832+
// Have a discussion with @Superjomn or the inference developers if some
833+
// changes on this logic for this macro might not tested on the other
834+
// scenerios.
835+
// If this op is not called by an Executor or ParallelExecutor, it should
836+
// called by a NaiveExecutor, the NaiveExecutor will cache the scopes and
837+
// variables, that behavior a lot different.
838+
if (!run_by_executor_) {
839+
new_scope = TryCreateTransferScope(kernel_type_for_var,
840+
expected_kernel_key, &scope);
854841
}
855-
#endif
856-
857-
if (new_scope == nullptr) {
842+
if (!new_scope) {
858843
new_scope = &scope.NewScope();
859844
}
860845

paddle/fluid/framework/operator.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ class OperatorBase {
127127
//! Get all outputs variable names
128128
virtual std::vector<std::string> OutputVars(bool has_intermediate) const;
129129

130+
void SetIsCalledByExecutor(bool x) { run_by_executor_ = x; }
131+
130132
protected:
131133
std::string type_;
132134
// NOTE: in case of OpGrad, inputs_ contains:
@@ -139,6 +141,8 @@ class OperatorBase {
139141
// IG (Inputs Gradients)
140142
VariableNameMap outputs_;
141143
AttributeMap attrs_;
144+
// Whether this operator executes in an Executor.
145+
bool run_by_executor_{true};
142146

143147
private:
144148
void GenerateTemporaryNames();
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/framework/transfer_scope_cache.h"
16+
17+
namespace paddle {
18+
namespace framework {
19+
20+
// Holds all the transfer scope across the process.
21+
std::unordered_map<size_t, Scope*>& global_transfer_data_cache() {
22+
typedef std::unordered_map<size_t, Scope*> map_t;
23+
thread_local std::unique_ptr<map_t> x(new map_t);
24+
return *x;
25+
}
26+
27+
// Holds all the transfer scope for this thread.
28+
std::unordered_set<Scope*>& global_transfer_scope_cache() {
29+
typedef std::unordered_set<Scope*> set_t;
30+
thread_local std::unique_ptr<set_t> x(new set_t);
31+
return *x;
32+
}
33+
34+
// Try to create a transfer scope. If one cached scope has match the
35+
// requirement, just return that one.
36+
// Inputs:
37+
// @type0: the source kernel type.
38+
// @type1: the target kernel type.
39+
// @scope: the execution scope of this op.
40+
// Returns: A scope used to hold the transfer data across the different kernel
41+
// type.
42+
Scope* TryCreateTransferScope(OpKernelType type0, OpKernelType type1,
43+
const Scope* scope) {
44+
Scope* new_scope{nullptr};
45+
size_t infer_cache_key =
46+
CombineHash(OpKernelType::Hash()(type0), OpKernelType::Hash()(type1));
47+
infer_cache_key =
48+
CombineHash(infer_cache_key, std::hash<const Scope*>()(scope));
49+
50+
auto it = global_transfer_data_cache().find(infer_cache_key);
51+
if (it != global_transfer_data_cache().end()) {
52+
new_scope = global_transfer_data_cache()[infer_cache_key];
53+
} else {
54+
new_scope = &scope->NewScope();
55+
global_transfer_data_cache()[infer_cache_key] = new_scope;
56+
}
57+
global_transfer_scope_cache().insert(new_scope);
58+
return new_scope;
59+
}
60+
61+
} // namespace framework
62+
} // namespace paddle
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <thread> // NOLINT
18+
#include <unordered_map>
19+
#include <unordered_set>
20+
#include "paddle/fluid/framework/op_kernel_type.h"
21+
#include "paddle/fluid/framework/scope.h"
22+
23+
namespace paddle {
24+
namespace framework {
25+
26+
std::unordered_map<size_t, Scope*>& global_transfer_data_cache();
27+
28+
std::unordered_set<Scope*>& global_transfer_scope_cache();
29+
30+
// Combine two hash values to a single hash.
31+
static size_t CombineHash(size_t seed, size_t a) {
32+
return (seed ^ a) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
33+
}
34+
35+
Scope* TryCreateTransferScope(OpKernelType type0, OpKernelType type1,
36+
const Scope* scope);
37+
38+
void RemoveKidsFromTransferScopeCache(Scope* scope);
39+
40+
} // namespace framework
41+
} // namespace paddle

paddle/fluid/inference/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ endif()
44
# analysis and tensorrt must be added before creating static library,
55
# otherwise, there would be undefined reference to them in static library.
66
add_subdirectory(analysis)
7+
add_subdirectory(utils)
78
if (TENSORRT_FOUND)
89
add_subdirectory(tensorrt)
910
endif()

0 commit comments

Comments
 (0)