File tree Expand file tree Collapse file tree 2 files changed +10
-5
lines changed
paddle/fluid/inference/tensorrt Expand file tree Collapse file tree 2 files changed +10
-5
lines changed Original file line number Diff line number Diff line change @@ -35,8 +35,15 @@ void TensorRTEngine::Build(const DescType &paddle_model) {
35
35
void TensorRTEngine::Execute (int batch_size, std::vector<void *> *buffers,
36
36
cudaStream_t stream) {
37
37
freshDeviceId ();
38
+ const std::thread::id tid = std::this_thread::get_id ();
38
39
batch_size_ = batch_size;
39
- infer_context_->enqueue (batch_size, buffers->data (), stream, nullptr );
40
+ if (infer_context_.find (tid) == infer_context_.end ()) {
41
+ PADDLE_ENFORCE_NOT_NULL (
42
+ infer_engine_,
43
+ " You should build engine first and then set the context." );
44
+ infer_context_[tid].reset (infer_engine_->createExecutionContext ());
45
+ }
46
+ infer_context_[tid]->enqueue (batch_size, buffers->data (), stream, nullptr );
40
47
cudaStreamSynchronize (stream);
41
48
SetRuntimeBatch (batch_size);
42
49
}
@@ -111,8 +118,6 @@ void TensorRTEngine::FreezeNetwork() {
111
118
112
119
infer_engine_.reset (infer_builder_->buildCudaEngine (*infer_network_));
113
120
PADDLE_ENFORCE (infer_engine_ != nullptr , " build cuda engine failed!" );
114
-
115
- infer_context_.reset (infer_engine_->createExecutionContext ());
116
121
}
117
122
118
123
nvinfer1::ITensor *TensorRTEngine::DeclareInput (const std::string &name,
Original file line number Diff line number Diff line change @@ -128,7 +128,6 @@ class TensorRTEngine {
128
128
&inference::Singleton<plugin::PluginFactoryTensorRT>::Global ()));
129
129
PADDLE_ENFORCE (infer_engine_ != nullptr ,
130
130
" build cuda engine failed when deserialize engine info.!" );
131
- infer_context_.reset (infer_engine_->createExecutionContext ());
132
131
}
133
132
134
133
void SetRuntimeBatch (size_t batch_size);
@@ -200,7 +199,8 @@ class TensorRTEngine {
200
199
infer_ptr<nvinfer1::IBuilder> infer_builder_;
201
200
infer_ptr<nvinfer1::INetworkDefinition> infer_network_;
202
201
infer_ptr<nvinfer1::ICudaEngine> infer_engine_;
203
- infer_ptr<nvinfer1::IExecutionContext> infer_context_;
202
+ std::unordered_map<std::thread::id, infer_ptr<nvinfer1::IExecutionContext>>
203
+ infer_context_;
204
204
infer_ptr<nvinfer1::IHostMemory> ihost_memory_;
205
205
std::unordered_map<nvinfer1::ITensor*, float > quant_dynamic_range_;
206
206
}; // class TensorRTEngine
You can’t perform that action at this time.
0 commit comments