-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Open
Description
bool MyTensorRT::infer(const std::string& imagePath)
{
if (!mEngine) return false;
auto context = std::unique_ptr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
if(!context) return false;
int nbBindings = mEngine->getNbIOTensors();
int inputIndex = -1, outputIndex = -1;
for (int i = 0; i < nbBindings; ++i) {
auto name = mEngine->getIOTensorName(i);
std::cout << "nbBindings name: " << name << std::endl;
if (mEngine->getTensorIOMode(name) == nvinfer1::TensorIOMode::kINPUT)
inputIndex = i;
else if (mEngine->getTensorIOMode(name) == nvinfer1::TensorIOMode::kOUTPUT)
outputIndex = i;
}
if (inputIndex < 0 || outputIndex < 0) return false;
char const* input_name = mEngine->getIOTensorName(inputIndex);
printf("input dtype:%d\n",mEngine->getTensorDataType(input_name));
assert(mEngine->getTensorDataType(input_name) == nvinfer1::DataType::kFLOAT);
nvinfer1::Dims4 input_shape = {1, 3, 224, 224};
context->setInputShape(input_name, input_shape);
size_t input_size = getMemorySize(input_shape, sizeof(float));
printf("size: %d\n",input_size);
char const* output_name = mEngine->getIOTensorName(outputIndex);
printf("output dtype:%d\n",mEngine->getTensorDataType(output_name));
assert(mEngine->getTensorDataType(output_name) == nvinfer1::DataType::kFLOAT);
auto output_shape = context->getTensorShape(output_name);
size_t output_size = getMemorySize(output_shape, sizeof(float));
void* input_mem{nullptr};
if(cudaMalloc(&input_mem, input_size) != cudaSuccess)
{
fprintf(stderr, "ERROR: cudaMalloc for input_mem failed. Size=%zu bytes. ",
input_size);
return false;
}
void* output_mem{nullptr};
if (cudaMalloc(&output_mem, output_size) != cudaSuccess)
{
fprintf(stderr, "ERROR: cudaMalloc for output_mem failed. Size=%zu bytes. ",
output_size);
return false;
}
std::vector<float> hostInput;
if (!readImageToNCHWFloat(imagePath, hostInput, input_shape.d[1], input_shape.d[2], input_shape.d[3],true)) {
std::cerr << "Failed to read image\n";
return false;
}
printf("image size: %d\n", hostInput.size());
// std::vector<float> hostInput(input_size / sizeof(float), 1.0f);
// printf("image size: %d\n", hostInput.size());
cudaStream_t stream;
if (cudaStreamCreate(&stream) != cudaSuccess)
{
std::cerr << "ERROR: cuda stream creation failed." << std::endl;
return false;
}
context->setTensorAddress(input_name, input_mem);
context->setTensorAddress(output_name, output_mem);
// copy image data to input bindding memory
if(cudaMemcpyAsync(input_mem, hostInput.data(), input_size, cudaMemcpyHostToDevice, stream) != cudaSuccess)
{
std::cerr << "ERROR: CUDA memory copy of input faile\n";
return false;
}
cudaStreamSynchronize(stream);
// run tensorRT inference
bool status = context->enqueueV3(stream);
printf("status: %d\n",status);
if (!status)
{
std::cerr << "ERROR: TensorRT inference failed" << std::endl;
// return false;
}
// Copy predictions from output binding memory
size_t output_num = output_size / sizeof(float);
auto output_buffer = std::unique_ptr<float[]>(new float[output_num]);
if (cudaMemcpyAsync(output_buffer.get(), output_mem, output_size, cudaMemcpyDeviceToHost, stream) != cudaSuccess)
{
std::cerr << "ERROR: CUDA memory copy of output failed, size = " << output_size << " bytes" << std::endl;
return false;
}
cudaStreamSynchronize(stream);
// Free CUDA resources
cudaFree(input_mem);
cudaFree(output_mem);
return true;
}
output:
[TRT] Deserialization required 31975 microseconds.
[TRT] Total per-runner device persistent memory is 1536
[TRT] Total per-runner host persistent memory is 261856
[TRT] Allocated activation device memory of size 31109120
[TRT] CUDA lazy loading is enabled.
nbBindings name: input
nbBindings name: onnx::Softmax_504
nbBindings name: output
input dtype:0
size: 602112
output dtype:0
1, 6
image size: 150528
status: 0
ERROR: TensorRT inference failed
Metadata
Metadata
Assignees
Labels
No labels