Skip to content

Commit ee74be3

Browse files
authored
[1.1] Bugfix/tensorarray (#14044)
1 parent 33b4920 commit ee74be3

16 files changed

+225
-19
lines changed

CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ option(WITH_ANAKIN "Compile with Anakin library" OFF)
6969
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
7070
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
7171
option(WITH_INFERENCE "Compile fluid inference library" ON)
72+
option(ON_INFER "Turn on inference optimization." OFF)
7273
option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF)
7374
option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
7475
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
@@ -302,3 +303,8 @@ if(WITH_DOC)
302303
find_python_module(recommonmark REQUIRED)
303304
add_subdirectory(doc)
304305
endif()
306+
307+
if (ON_INFER)
308+
message(WARNING "On inference mode, will take place some specific optimization.")
309+
add_definitions(-DPADDLE_ON_INFERENCE)
310+
endif()

cmake/inference_lib.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414

1515
# make package for paddle fluid shared and static library
1616
function(copy TARGET)
17+
if (NOT ON_INFER)
18+
message(WARNING "Turn on the ON_INFER flag when building inference_lib only.")
19+
endif()
1720
set(options "")
1821
set(oneValueArgs "")
1922
set(multiValueArgs SRCS DSTS DEPS)

paddle/fluid/framework/lod_tensor_array.h

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,82 @@ limitations under the License. */
1818

1919
namespace paddle {
2020
namespace framework {
21+
22+
// NOTE The vector<LoDTensor> can't be replaced with the class LoDTensorArray
23+
// directly, because there are many vector<LoDTensor> used accross the project,
24+
// and some of them are treated as LoDTensorArray.
25+
#if !defined(PADDLE_ON_INFERENCE)
26+
2127
using LoDTensorArray = std::vector<LoDTensor>;
22-
}
28+
29+
#else // !PADDLE_ON_INFERENCE
30+
31+
#pragma message "LoDTensorArray is replaced with the inference one."
32+
/*
33+
* A LoDTensorArray which will not deallocate buffer when resized, fix the data
34+
* diff in inference, and more performance friendly in the concurrency
35+
* scenerios.
36+
*/
37+
class LoDTensorArray {
38+
public:
39+
LoDTensorArray() = default;
40+
41+
using iterator = std::vector<LoDTensor>::iterator;
42+
using const_iterator = std::vector<LoDTensor>::const_iterator;
43+
44+
const_iterator begin() const { return array_.begin(); }
45+
const_iterator end() const { return array_.begin() + size_; }
46+
iterator begin() { return array_.begin(); }
47+
iterator end() { return array_.begin() + size_; }
48+
49+
void push_back(const LoDTensor& x) {
50+
if (size_ < array_.size()) {
51+
array_[size_++] = x;
52+
} else {
53+
array_.push_back(x);
54+
++size_;
55+
}
56+
}
57+
void resize(size_t size) {
58+
if (array_.size() < size) {
59+
array_.resize(size);
60+
}
61+
size_ = size;
62+
}
63+
64+
void emplace_back() { array_.emplace_back(); }
65+
66+
void emplace_back(LoDTensor&& x) { array_.emplace_back(std::move(x)); }
67+
68+
LoDTensor& back() { return array_.back(); }
69+
70+
size_t space() const { return array_.size(); }
71+
72+
void reserve(size_t size) {
73+
// Naive warning to tell user this array might be to large. The memory and
74+
// buffer used by this TensorArray will not be deleted during the training
75+
// and inference phase, so attention not to make it expand too long.
76+
if (size > 800UL) {
77+
LOG(WARNING) << "TensorArray has more than 800 items";
78+
}
79+
array_.reserve(size);
80+
}
81+
82+
bool empty() const { return size_ == 0UL; }
83+
void clear() { size_ = 0UL; }
84+
85+
LoDTensor& operator[](size_t id) { return array_[id]; }
86+
const LoDTensor& operator[](size_t id) const { return array_[id]; }
87+
LoDTensor& at(size_t id) { return array_.at(id); }
88+
const LoDTensor& at(size_t id) const { return array_.at(id); }
89+
90+
size_t size() const { return size_; }
91+
92+
private:
93+
size_t size_{0};
94+
std::vector<LoDTensor> array_;
95+
};
96+
#endif // !PADDLE_ON_INFERENCE
97+
98+
} // namespace framework
2399
} // namespace paddle

paddle/fluid/framework/scope.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ class Scope {
7878
/// Drop all kids scopes belonged to this scope.
7979
void DropKids();
8080

81+
std::list<Scope*>& kids() const { return kids_; }
82+
8183
/// Find if a scope exists in the kid scopes
8284
bool HasKid(const Scope* scope) const;
8385

paddle/fluid/inference/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ if (WITH_GPU AND TENSORRT_FOUND)
3030
endif()
3131

3232
# Create static library
33-
cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor)
33+
cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array)
3434

3535
if(NOT APPLE)
3636
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
@@ -40,7 +40,7 @@ endif()
4040

4141
# Create shared library
4242
cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
43-
DEPS ${fluid_modules} paddle_fluid_api)
43+
DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array)
4444

4545
set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)
4646
if(NOT APPLE)

paddle/fluid/inference/api/CMakeLists.txt

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ if(APPLE)
1818
endif(APPLE)
1919

2020

21-
set(inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager naive_executor ${GLOB_PASS_LIB})
21+
set(inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager naive_executor ${GLOB_PASS_LIB}
22+
)
2223

2324
if(WITH_GPU AND TENSORRT_FOUND)
2425
set(inference_deps ${inference_deps} paddle_inference_tensorrt_subgraph_engine analysis_predictor)
@@ -31,18 +32,26 @@ function(inference_api_test TARGET_NAME)
3132
set(multiValueArgs ARGS)
3233
cmake_parse_arguments(inference_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
3334

34-
cc_test(${TARGET_NAME}
35-
SRCS ${inference_test_SRC}
36-
DEPS "${inference_deps}"
37-
ARGS --dirname=${PYTHON_TESTS_DIR}/book/)
35+
if (WITH_GPU)
36+
cc_test(${TARGET_NAME}
37+
SRCS ${inference_test_SRC}
38+
DEPS "${inference_deps}"
39+
ARGS --dirname=${PYTHON_TESTS_DIR}/book/ --fraction_of_gpu_memory_to_use=0.15)
40+
else()
41+
cc_test(${TARGET_NAME}
42+
SRCS ${inference_test_SRC}
43+
DEPS "${inference_deps}"
44+
ARGS --dirname=${PYTHON_TESTS_DIR}/book/)
45+
endif()
3846
if(inference_test_ARGS)
3947
set_tests_properties(${TARGET_NAME}
4048
PROPERTIES DEPENDS "${inference_test_ARGS}")
4149
endif()
4250
endif(WITH_TESTING)
4351
endfunction(inference_api_test)
4452

45-
cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope)
53+
cc_library(reset_tensor_array SRCS details/reset_tensor_array.cc DEPS lod_tensor scope)
54+
cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS reset_tensor_array lod_tensor scope)
4655
cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor)
4756
cc_library(zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS paddle_inference_api)
4857
cc_library(zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc DEPS paddle_inference_api)

paddle/fluid/inference/api/analysis_predictor.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ bool AnalysisPredictor::Init(
8282

8383
// Get the feed_target_names and fetch_target_names
8484
PrepareFeedFetch();
85+
8586
return true;
8687
}
8788

@@ -109,6 +110,10 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
109110
return false;
110111
}
111112
VLOG(3) << "predict cost: " << timer.toc() << "ms";
113+
114+
// Fix TensorArray reuse not cleaned bug.
115+
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
116+
tensor_array_batch_cleaner_.ResetTensorArray();
112117
return true;
113118
}
114119

@@ -322,6 +327,9 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
322327

323328
bool AnalysisPredictor::ZeroCopyRun() {
324329
executor_->Run();
330+
// Fix TensorArray reuse not cleaned bug.
331+
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
332+
tensor_array_batch_cleaner_.ResetTensorArray();
325333
return true;
326334
}
327335

paddle/fluid/inference/api/analysis_predictor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "paddle/fluid/framework/naive_executor.h"
1919
#include "paddle/fluid/inference/analysis/analyzer.h"
2020
#include "paddle/fluid/inference/api/api_impl.h"
21+
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
2122
#include "paddle/fluid/inference/api/paddle_inference_api.h"
2223
#include "paddle/fluid/string/printf.h"
2324

@@ -88,6 +89,7 @@ class AnalysisPredictor : public PaddlePredictor {
8889
// Memory buffer for feed inputs. The temporary LoDTensor will cause serious
8990
// concurrency problems, so cache them.
9091
std::vector<framework::LoDTensor> feed_tensors_;
92+
details::TensorArrayBatchCleaner tensor_array_batch_cleaner_;
9193
};
9294

9395
} // namespace paddle

paddle/fluid/inference/api/api_impl.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ limitations under the License. */
2222

2323
#include "paddle/fluid/framework/feed_fetch_method.h"
2424
#include "paddle/fluid/inference/api/api_impl.h"
25+
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
2526
#include "paddle/fluid/inference/api/helper.h"
2627
#include "paddle/fluid/platform/cpu_helper.h"
2728
#include "paddle/fluid/platform/profiler.h"
@@ -157,6 +158,10 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
157158
return false;
158159
}
159160
VLOG(3) << "predict cost: " << timer.toc() << "ms";
161+
162+
// Fix TensorArray reuse not cleaned bug.
163+
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
164+
tensor_array_batch_cleaner_.ResetTensorArray();
160165
return true;
161166
}
162167

paddle/fluid/inference/api/api_impl.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ limitations under the License. */
2626
#include <string>
2727
#include <vector>
2828

29-
#include "paddle/fluid/inference/api/paddle_inference_api.h"
30-
3129
#include "paddle/fluid/framework/ddim.h"
3230
#include "paddle/fluid/framework/lod_tensor.h"
31+
#include "paddle/fluid/framework/lod_tensor_array.h"
3332
#include "paddle/fluid/framework/naive_executor.h"
33+
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
3434
#include "paddle/fluid/inference/api/paddle_inference_api.h"
3535
#include "paddle/fluid/inference/io.h"
3636
#include "paddle/fluid/platform/init.h"
@@ -77,6 +77,7 @@ class NativePaddlePredictor : public PaddlePredictor {
7777
std::vector<framework::OpDesc *> fetchs_;
7878
// Do not use unique_ptr, use parent scope to delete
7979
framework::Scope *sub_scope_{nullptr};
80+
details::TensorArrayBatchCleaner tensor_array_batch_cleaner_;
8081
};
8182

8283
} // namespace paddle

0 commit comments

Comments
 (0)