@@ -11,7 +11,7 @@ namespace yolox_cpp
1111 {
1212 cudaSetDevice (this ->DEVICE_ );
1313 // create a model using the API directly and serialize it to a stream
14- char * trtModelStream{ nullptr } ;
14+ std::vector< char > trtModelStream;
1515 size_t size{0 };
1616
1717 std::ifstream file (path_to_engine, std::ios::binary);
@@ -20,9 +20,9 @@ namespace yolox_cpp
2020 file.seekg (0 , file.end );
2121 size = file.tellg ();
2222 file.seekg (0 , file.beg );
23- trtModelStream = new char [ size] ;
23+ trtModelStream. resize ( size) ;
2424 assert (trtModelStream);
25- file.read (trtModelStream, size);
25+ file.read (trtModelStream. data () , size);
2626 file.close ();
2727 }
2828 else
@@ -34,11 +34,10 @@ namespace yolox_cpp
3434
3535 this ->runtime_ = std::unique_ptr<IRuntime>(createInferRuntime (this ->gLogger_ ));
3636 assert (this ->runtime_ != nullptr );
37- this ->engine_ = std::unique_ptr<ICudaEngine>(this ->runtime_ ->deserializeCudaEngine (trtModelStream, size));
37+ this ->engine_ = std::unique_ptr<ICudaEngine>(this ->runtime_ ->deserializeCudaEngine (trtModelStream. data () , size));
3838 assert (this ->engine_ != nullptr );
3939 this ->context_ = std::unique_ptr<IExecutionContext>(this ->engine_ ->createExecutionContext ());
4040 assert (this ->context_ != nullptr );
41- delete[] trtModelStream;
4241
4342 const auto input_name = this ->engine_ ->getIOTensorName (this ->inputIndex_ );
4443 const auto input_dims = this ->engine_ ->getTensorShape (input_name);
@@ -74,8 +73,8 @@ namespace yolox_cpp
7473 assert (this ->context_ ->setInputShape (input_name, input_dims));
7574 assert (this ->context_ ->allInputDimensionsSpecified ());
7675
77- assert (this ->context_ ->setTensorAddress (input_name, this ->inference_buffers_ [this ->inputIndex_ ]));
78- assert (this ->context_ ->setTensorAddress (output_name, this ->inference_buffers_ [this ->outputIndex_ ]));
76+ assert (this ->context_ ->setInputTensorAddress (input_name, this ->inference_buffers_ [this ->inputIndex_ ]));
77+ assert (this ->context_ ->setOutputTensorAddress (output_name, this ->inference_buffers_ [this ->outputIndex_ ]));
7978
8079 // Prepare GridAndStrides
8180 if (this ->p6_ )
@@ -126,17 +125,19 @@ namespace yolox_cpp
126125 // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
127126 CHECK (
128127 cudaMemcpyAsync (
129- this ->inference_buffers_ [this ->inputIndex_ ], input,
128+ this ->inference_buffers_ [this ->inputIndex_ ],
129+ input,
130130 3 * this ->input_h_ * this ->input_w_ * sizeof (float ),
131131 cudaMemcpyHostToDevice, stream));
132132
133- bool success = context_->enqueueV3 (stream );
133+ bool success = context_->executeV2 ( this -> inference_buffers_ );
134134 if (!success)
135135 throw std::runtime_error (" failed inference" );
136136
137137 CHECK (
138138 cudaMemcpyAsync (
139- output, this ->inference_buffers_ [this ->outputIndex_ ],
139+ output,
140+ this ->inference_buffers_ [this ->outputIndex_ ],
140141 this ->output_size_ * sizeof (float ),
141142 cudaMemcpyDeviceToHost, stream));
142143
0 commit comments