@@ -49,9 +49,8 @@ size_t getDataSize(const std::vector<int64_t>& shape, ChannelType dataType) {
4949
5050std::error_code TensorRTInferencer::getLayerInfo (LayerInfo& layer, std::string layerName) {
5151 layer.name = layerName;
52- layer.index = m_inferenceEngine->getBindingIndex (layerName.c_str ());
53- auto dim = m_inferenceEngine->getBindingDimensions (layer.index );
54- nvinfer1::TensorFormat tensorFormat = m_inferenceEngine->getBindingFormat (layer.index );
52+ auto dim = m_inferenceEngine->getTensorShape (layer.name .c_str ());
53+ nvinfer1::TensorFormat tensorFormat = m_inferenceEngine->getTensorFormat (layer.name .c_str ());
5554
5655 std::error_code err;
5756 err = getCVCoreChannelLayoutFromTensorRT (layer.layout , tensorFormat);
@@ -64,7 +63,7 @@ std::error_code TensorRTInferencer::getLayerInfo(LayerInfo& layer, std::string l
6463 }
6564
6665 err = getCVCoreChannelTypeFromTensorRT (layer.dataType ,
67- m_inferenceEngine->getBindingDataType (layer.index ));
66+ m_inferenceEngine->getTensorDataType (layer.name . c_str () ));
6867 layer.layerSize = getDataSize (layer.shape , layer.dataType );
6968 if (err != make_error_code (ErrorCode::SUCCESS)) {
7069 return ErrorCode::INVALID_ARGUMENT;
@@ -174,16 +173,15 @@ std::error_code TensorRTInferencer::convertModelToEngine(int32_t dla_core,
174173 }
175174 builderConfig->addOptimizationProfile (optimization_profile);
176175
177- // Creates TensorRT Engine Plan
178- std::unique_ptr<nvinfer1::ICudaEngine> engine (
179- builder->buildEngineWithConfig (*network, *builderConfig));
180- if (!engine ) {
176+ // Creates TensorRT Model stream
177+ std::unique_ptr<nvinfer1::IHostMemory> model_stream (
178+ builder->buildSerializedNetwork (*network, *builderConfig));
179+ if (!model_stream ) {
181180 GXF_LOG_ERROR (" Failed to build TensorRT engine from model %s." , model_file);
182181 return InferencerErrorCode::INVALID_ARGUMENT;
183182 }
184183
185- std::unique_ptr<nvinfer1::IHostMemory> model_stream (engine->serialize ());
186- if (!model_stream || model_stream->size () == 0 || model_stream->data () == nullptr ) {
184+ if (model_stream->size () == 0 || model_stream->data () == nullptr ) {
187185 GXF_LOG_ERROR (" Fail to serialize TensorRT Engine." );
188186 return InferencerErrorCode::INVALID_ARGUMENT;
189187 }
@@ -284,13 +282,14 @@ TensorRTInferencer::TensorRTInferencer(const TensorRTInferenceParams& params)
284282 }
285283
286284 m_hasImplicitBatch = m_inferenceEngine->hasImplicitBatchDimension ();
287- m_bindingsCount = m_inferenceEngine->getNbBindings ();
285+ m_ioTensorsCount = m_inferenceEngine->getNbIOTensors ();
288286 if (!m_hasImplicitBatch) {
289- for (size_t i = 0 ; i < m_bindingsCount; i++) {
290- if (m_inferenceEngine->bindingIsInput (i)) {
291- nvinfer1::Dims dims_i (m_inferenceEngine->getBindingDimensions (i));
287+ for (size_t i = 0 ; i < m_ioTensorsCount; i++) {
288+ const char * name = m_inferenceEngine->getIOTensorName (i);
289+ if (m_inferenceEngine->getTensorIOMode (name) == nvinfer1::TensorIOMode::kINPUT ) {
290+ nvinfer1::Dims dims_i (m_inferenceEngine->getTensorShape (name));
292291 nvinfer1::Dims4 inputDims{1 , dims_i.d [1 ], dims_i.d [2 ], dims_i.d [3 ]};
293- m_inferenceContext->setBindingDimensions (i , inputDims);
292+ m_inferenceContext->setInputShape (name , inputDims);
294293 }
295294 }
296295 }
@@ -299,7 +298,6 @@ TensorRTInferencer::TensorRTInferencer(const TensorRTInferenceParams& params)
299298 if (err != make_error_code (ErrorCode::SUCCESS)) {
300299 throw err;
301300 }
302- m_buffers.resize (m_bindingsCount);
303301}
304302
305303// Set input layer tensor
@@ -309,7 +307,8 @@ std::error_code TensorRTInferencer::setInput(const TensorBase& trtInputBuffer,
309307 return ErrorCode::INVALID_ARGUMENT;
310308 }
311309 LayerInfo layer = m_modelInfo.inputLayers [inputLayerName];
312- m_buffers[layer.index ] = trtInputBuffer.getData ();
310+ m_inferenceContext->setTensorAddress (inputLayerName.c_str (),
311+ trtInputBuffer.getData ());
313312 return ErrorCode::SUCCESS;
314313}
315314
@@ -320,7 +319,8 @@ std::error_code TensorRTInferencer::setOutput(TensorBase& trtOutputBuffer,
320319 return ErrorCode::INVALID_ARGUMENT;
321320 }
322321 LayerInfo layer = m_modelInfo.outputLayers [outputLayerName];
323- m_buffers[layer.index ] = trtOutputBuffer.getData ();
322+ m_inferenceContext->setTensorAddress (outputLayerName.c_str (),
323+ trtOutputBuffer.getData ());
324324 return ErrorCode::SUCCESS;
325325}
326326
@@ -334,18 +334,18 @@ ModelMetaData TensorRTInferencer::getModelMetaData() const {
334334std::error_code TensorRTInferencer::infer (size_t batchSize) {
335335 bool err = true ;
336336 if (!m_hasImplicitBatch) {
337- size_t bindingsCount = m_inferenceEngine->getNbBindings ();
338- for (size_t i = 0 ; i < bindingsCount ; i++) {
339- if ( m_inferenceEngine->bindingIsInput (i)) {
340- nvinfer1::Dims dims_i (m_inferenceEngine->getBindingDimensions (i));
341- nvinfer1::Dims4 inputDims{ static_cast < int >(batchSize), dims_i. d [ 1 ],
342- dims_i.d [2 ], dims_i.d [3 ]};
343- m_inferenceContext->setBindingDimensions (i , inputDims);
337+ size_t ioTensorsCount = m_inferenceEngine->getNbIOTensors ();
338+ for (size_t i = 0 ; i < ioTensorsCount ; i++) {
339+ const char * name = m_inferenceEngine->getIOTensorName (i);
340+ if (m_inferenceEngine->getTensorIOMode (name) == nvinfer1::TensorIOMode:: kINPUT ) {
341+ nvinfer1::Dims dims_i (m_inferenceEngine-> getTensorShape (name));
342+ nvinfer1::Dims4 inputDims{ 1 , dims_i. d [ 1 ], dims_i.d [2 ], dims_i.d [3 ]};
343+ m_inferenceContext->setInputShape (name , inputDims);
344344 }
345345 }
346- err = m_inferenceContext->enqueueV2 (&m_buffers[ 0 ], m_cudaStream, nullptr );
346+ err = m_inferenceContext->enqueueV3 ( m_cudaStream);
347347 } else {
348- err = m_inferenceContext-> enqueue (m_maxBatchSize, &m_buffers[ 0 ], m_cudaStream, nullptr ) ;
348+ return InferencerErrorCode::INVALID_ARGUMENT ;
349349 }
350350 if (!err) {
351351 return InferencerErrorCode::TENSORRT_INFERENCE_ERROR;
@@ -360,27 +360,14 @@ std::error_code TensorRTInferencer::setCudaStream(cudaStream_t cudaStream) {
360360}
361361
362362std::error_code TensorRTInferencer::unregister (std::string layerName) {
363- size_t index;
364- if (m_modelInfo.outputLayers .find (layerName) != m_modelInfo.outputLayers .end ()) {
365- index = m_modelInfo.outputLayers [layerName].index ;
366- } else if (m_modelInfo.inputLayers .find (layerName) != m_modelInfo.inputLayers .end ()) {
367- index = m_modelInfo.inputLayers [layerName].index ;
368- } else {
369- return ErrorCode::INVALID_ARGUMENT;
370- }
371- m_buffers[index] = nullptr ;
372363 return ErrorCode::SUCCESS;
373364}
374365
375366std::error_code TensorRTInferencer::unregister () {
376- for (size_t i = 0 ; i < m_buffers.size (); i++) {
377- m_buffers[i] = nullptr ;
378- }
379367 return ErrorCode::SUCCESS;
380368}
381369
382370TensorRTInferencer::~TensorRTInferencer () {
383- m_buffers.clear ();
384371}
385372
386373} // namespace inferencer
0 commit comments