Skip to content

Commit 5b50307

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into update-api-reference-1
2 parents 6ace04f + 5ea039b commit 5b50307

File tree

16 files changed

+280
-124
lines changed

16 files changed

+280
-124
lines changed

paddle/fluid/inference/tensorrt/convert/op_converter.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ class OpConverter {
6464
(*it)(op, scope, test_mode);
6565
}
6666

67-
// convert fluid block to tensorrt network
67+
// Convert a fluid block to tensorrt network, NOTE it just convert operators,
68+
// the INetwork's inputs and outputs should specified in some other modules.
6869
void ConvertBlock(const framework::proto::BlockDesc& block,
6970
const std::unordered_set<std::string>& parameters,
7071
const framework::Scope& scope, TensorRTEngine* engine) {

paddle/fluid/inference/tensorrt/engine.h

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,12 @@ class TensorRTEngine : public EngineBase {
5151
nvinfer1::Weights w_;
5252
};
5353

54-
TensorRTEngine(int max_batch, int max_workspace, cudaStream_t* stream,
54+
TensorRTEngine(int max_batch, int max_workspace,
55+
cudaStream_t* stream = nullptr,
5556
nvinfer1::ILogger& logger = NaiveLogger::Global())
5657
: max_batch_(max_batch),
5758
max_workspace_(max_workspace),
58-
stream_(stream),
59+
stream_(stream ? stream : &default_stream_),
5960
logger_(logger) {}
6061

6162
virtual ~TensorRTEngine();
@@ -121,6 +122,8 @@ class TensorRTEngine : public EngineBase {
121122
// the max memory size the engine uses
122123
int max_workspace_;
123124
cudaStream_t* stream_;
125+
// If stream_ is not set from outside, hold its own stream.
126+
cudaStream_t default_stream_;
124127
nvinfer1::ILogger& logger_;
125128

126129
std::vector<Buffer> buffers_;
@@ -165,20 +168,31 @@ class TensorRTEngine : public EngineBase {
165168
*/
166169
class TRT_EngineManager {
167170
public:
168-
TensorRTEngine* Create(int max_batch, int max_workspace,
169-
cudaStream_t* stream) {
170-
engines_.emplace_back(new TensorRTEngine(max_batch, max_workspace, stream));
171-
return engines_.back().get();
171+
bool HasEngine(const std::string& name) const {
172+
return engines_.count(name) != 0;
173+
}
174+
175+
// Get an engine called `name`.
176+
TensorRTEngine* Get(const std::string& name) const {
177+
return engines_.at(name).get();
178+
}
179+
180+
// Create or get an engine called `name`
181+
TensorRTEngine* Create(int max_batch, int max_workspace, cudaStream_t* stream,
182+
const std::string& name) {
183+
auto* p = new TensorRTEngine(max_batch, max_workspace, stream);
184+
engines_[name].reset(p);
185+
return p;
172186
}
173187

174188
void DeleteALl() {
175-
for (auto& ptr : engines_) {
176-
ptr.reset(nullptr);
189+
for (auto& item : engines_) {
190+
item.second.reset(nullptr);
177191
}
178192
}
179193

180194
private:
181-
std::vector<std::unique_ptr<TensorRTEngine>> engines_;
195+
std::unordered_map<std::string, std::unique_ptr<TensorRTEngine>> engines_;
182196
};
183197

184198
} // namespace tensorrt

paddle/fluid/operators/activation_op.cc

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -252,15 +252,14 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
252252
AddOutput("Out", "Output of Softshrink operator");
253253
AddAttr<float>("lambda", "non-negative offset").SetDefault(0.5f);
254254
AddComment(R"DOC(
255-
Softshrink Activation Operator.
256-
257-
$$
258-
out = \begin{cases}
259-
x - \lambda, \text{if } x > \lambda \\
260-
x + \lambda, \text{if } x < -\lambda \\
261-
0, \text{otherwise}
262-
\end{cases}
263-
$$
255+
:strong:`Softshrink Activation Operator`
256+
257+
.. math::
258+
out = \begin{cases}
259+
x - \lambda, \text{if } x > \lambda \\
260+
x + \lambda, \text{if } x < -\lambda \\
261+
0, \text{otherwise}
262+
\end{cases}
264263
265264
)DOC");
266265
}

paddle/fluid/operators/listen_and_serv_op.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,8 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker {
348348
};
349349

350350
void SignalHandler::StopAndExit(int signal_num) {
351-
VLOG(3) << "Catch interrupt signal: " << signal_num << ", program will exit";
351+
// Do not use VLOG here for the device for printing maybe already released.
352+
// exit will release interal allocated resoureces.
352353
exit(0);
353354
}
354355

paddle/fluid/operators/mean_op.cc

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,10 @@ class MeanOp : public framework::OperatorWithKernel {
3333
class MeanOpMaker : public framework::OpProtoAndCheckerMaker {
3434
public:
3535
void Make() override {
36-
AddInput("X", "The input of mean op");
37-
AddOutput("Out", "The output of mean op").Reuse("X");
36+
AddInput("X", "(Tensor) The input of mean op");
37+
AddOutput("Out", "(Tensor) The output of mean op").Reuse("X");
3838
AddComment(R"DOC(
39-
Mean Operator.
40-
41-
Out is a scalar which is the mean of all elements in X.
39+
Mean Operator calculates the mean of all elements in X.
4240
4341
)DOC");
4442
}

paddle/fluid/operators/tensorrt_engine_op.cc

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -66,17 +66,25 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) {
6666
} // namespace
6767

6868
template <typename DeviceContext, typename T>
69-
void paddle::operators::TensorRTEngineKernel<DeviceContext, T>::Prepare(
69+
void TensorRTEngineKernel<DeviceContext, T>::Prepare(
7070
const framework::ExecutionContext &context) const {
7171
VLOG(4) << "Prepare engine";
7272
// Get the ProgramDesc and pass to convert.
7373
framework::proto::BlockDesc block_desc;
7474
block_desc.ParseFromString(context.Attr<std::string>("subgraph"));
75-
max_batch_ = context.Attr<int>("max_batch");
75+
int max_batch = context.Attr<int>("max_batch");
7676
auto max_workspace = context.Attr<int>("max_workspace");
77-
engine_ = Singleton<TRT_EngineManager>::Global().Create(
78-
max_batch_, max_workspace, &stream_);
79-
engine_->InitNetwork();
77+
auto params = context.Attr<std::vector<std::string>>("parameters");
78+
std::unordered_set<std::string> parameters;
79+
for (const auto &param : params) {
80+
parameters.insert(param);
81+
}
82+
83+
// TODO(Superjomn) replace this with a different stream
84+
auto *engine = Singleton<TRT_EngineManager>::Global().Create(
85+
max_batch, max_workspace, nullptr /*engine hold its own stream*/,
86+
context.Attr<std::string>("engine_uniq_key"));
87+
engine->InitNetwork();
8088

8189
framework::BlockDesc block(nullptr /*programdesc*/, &block_desc);
8290
// Add inputs
@@ -87,24 +95,23 @@ void paddle::operators::TensorRTEngineKernel<DeviceContext, T>::Prepare(
8795
PADDLE_ENFORCE_EQ(var->GetType(), FluidDT::VarType_Type_LOD_TENSOR,
8896
"TensorRT engine only takes LoDTensor as input");
8997
auto shape = var->GetShape();
90-
engine_->DeclareInput(
98+
engine->DeclareInput(
9199
input, FluidDataType2TRT(
92100
var->Proto()->type().lod_tensor().tensor().data_type()),
93101
Vec2TRT_Dims(var->GetShape()));
94102
}
95103

96-
// TODO(Superjomn) parameters should be passed after analysised from outside.
97104
inference::Singleton<inference::tensorrt::OpConverter>::Global().ConvertBlock(
98-
block_desc, {}, context.scope(), engine_);
105+
block_desc, parameters, context.scope(), engine);
99106

100107
// Add outputs
101108
VLOG(4) << "declare outputs";
102109
for (auto &output : context.Outputs("Ys")) {
103110
VLOG(4) << "declare output " << output;
104-
engine_->DeclareOutput(output);
111+
engine->DeclareOutput(output);
105112
}
106113

107-
engine_->FreezeNetwork();
114+
engine->FreezeNetwork();
108115
}
109116

110117
class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
@@ -113,6 +120,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
113120
AddInput("Xs", "A list of inputs.").AsDuplicable();
114121
AddOutput("Ys", "A list of outputs").AsDuplicable();
115122
AddAttr<std::string>("subgraph", "the subgraph.");
123+
AddAttr<std::string>("engine_uniq_key", "unique key for the TRT engine.");
116124
AddAttr<int>("max_batch", "the maximum batch size.");
117125
AddAttr<int>("max_workspace", "the maximum batch size.");
118126
AddComment("TensorRT engine operator.");

paddle/fluid/operators/tensorrt_engine_op.h

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,14 @@
1919
#include "paddle/fluid/framework/operator.h"
2020
#include "paddle/fluid/inference/analysis/helper.h"
2121
#include "paddle/fluid/inference/tensorrt/engine.h"
22+
#include "paddle/fluid/inference/tensorrt/engine.h"
2223

2324
namespace paddle {
2425
namespace operators {
2526

27+
using inference::Singleton;
28+
using inference::tensorrt::TRT_EngineManager;
29+
2630
class TensorRTEngineOp : public framework::OperatorWithKernel {
2731
public:
2832
using framework::OperatorWithKernel::OperatorWithKernel;
@@ -47,37 +51,39 @@ template <typename DeviceContext, typename T>
4751
class TensorRTEngineKernel : public framework::OpKernel<T> {
4852
public:
4953
void Compute(const framework::ExecutionContext& context) const override {
50-
if (!engine_) {
54+
auto engine_name = context.Attr<std::string>("engine_uniq_key");
55+
if (!Singleton<TRT_EngineManager>::Global().HasEngine(engine_name)) {
5156
Prepare(context);
5257
}
58+
auto* engine = Singleton<TRT_EngineManager>::Global().Get(engine_name);
5359
auto input_names = context.op().Inputs("Xs");
5460
PADDLE_ENFORCE(!input_names.empty(), "should pass more than one inputs");
5561
// Try to determine a batch_size
5662
auto& tensor0 = inference::analysis::GetFromScope<framework::LoDTensor>(
5763
context.scope(), input_names.front());
5864
int batch_size = tensor0.dims()[0];
59-
PADDLE_ENFORCE_LE(batch_size, max_batch_);
65+
PADDLE_ENFORCE_LE(batch_size, context.Attr<int>("max_batch"));
6066

6167
// Convert input tensor from fluid to engine.
6268
for (const auto& x : context.Inputs("Xs")) {
6369
// convert input and copy to TRT engine's buffer
6470
auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(
6571
context.scope(), x);
6672
if (platform::is_cpu_place(t.place())) {
67-
engine_->SetInputFromCPU(x, static_cast<const void*>(t.data<void>()),
68-
t.memory_size());
73+
engine->SetInputFromCPU(x, static_cast<const void*>(t.data<void>()),
74+
t.memory_size());
6975
} else {
70-
engine_->SetInputFromGPU(x, static_cast<const void*>(t.data<void>()),
71-
t.memory_size());
76+
engine->SetInputFromGPU(x, static_cast<const void*>(t.data<void>()),
77+
t.memory_size());
7278
}
7379
}
7480
// Execute the engine.
7581
PADDLE_ENFORCE_GT(batch_size, 0);
76-
engine_->Execute(batch_size);
82+
engine->Execute(batch_size);
7783
// Convert output tensor from engine to fluid
7884
for (const auto& y : context.Outputs("Ys")) {
7985
// convert output and copy to fluid.
80-
nvinfer1::ITensor* trt_t = engine_->GetITensor(y);
86+
nvinfer1::ITensor* trt_t = engine->GetITensor(y);
8187
auto dims = trt_t->getDimensions();
8288
// Use the output ITensor's dims to reshape the Fluid Tensor.
8389
std::vector<int> ddim(dims.d, dims.d + dims.nbDims);
@@ -89,27 +95,22 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
8995
auto size = inference::analysis::AccuDims(dims.d, dims.nbDims);
9096
if (platform::is_cpu_place(fluid_t->place())) {
9197
// TODO(Superjomn) change this float to dtype size.
92-
engine_->GetOutputInCPU(
98+
engine->GetOutputInCPU(
9399
y, fluid_t->mutable_data<float>(platform::CPUPlace()),
94100
size * sizeof(float));
95101
} else {
96-
engine_->GetOutputInGPU(
102+
engine->GetOutputInGPU(
97103
y, fluid_t->mutable_data<float>(platform::CUDAPlace()),
98104
size * sizeof(float));
99105
}
100106
}
101107

102-
cudaStreamSynchronize(stream_);
108+
cudaStreamSynchronize(*engine->stream());
103109
}
104110

105111
protected:
106112
// Build the engine.
107113
void Prepare(const framework::ExecutionContext& context) const;
108-
109-
private:
110-
mutable cudaStream_t stream_;
111-
mutable inference::tensorrt::TensorRTEngine* engine_{nullptr};
112-
mutable int max_batch_{0};
113114
};
114115

115116
} // namespace operators

0 commit comments

Comments
 (0)