Skip to content

enqueueV3() return false, don't how to debug. i present my infer code #4649

@Kivw

Description

@Kivw

bool MyTensorRT::infer(const std::string& imagePath)
{
if (!mEngine) return false;

auto context = std::unique_ptr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
if(!context) return false;

int nbBindings = mEngine->getNbIOTensors(); 
int inputIndex = -1, outputIndex = -1;

for (int i = 0; i < nbBindings; ++i) {
    auto name = mEngine->getIOTensorName(i);
    std::cout << "nbBindings name: " << name << std::endl;
    if (mEngine->getTensorIOMode(name) == nvinfer1::TensorIOMode::kINPUT)
        inputIndex = i;
    else if (mEngine->getTensorIOMode(name) == nvinfer1::TensorIOMode::kOUTPUT)
        outputIndex = i;
}
if (inputIndex < 0 || outputIndex < 0) return false;


char const* input_name = mEngine->getIOTensorName(inputIndex);
printf("input dtype:%d\n",mEngine->getTensorDataType(input_name));
assert(mEngine->getTensorDataType(input_name) == nvinfer1::DataType::kFLOAT);
nvinfer1::Dims4 input_shape = {1, 3, 224, 224};
context->setInputShape(input_name, input_shape); 
size_t input_size = getMemorySize(input_shape, sizeof(float));
printf("size: %d\n",input_size);

char const* output_name = mEngine->getIOTensorName(outputIndex);
printf("output dtype:%d\n",mEngine->getTensorDataType(output_name));
assert(mEngine->getTensorDataType(output_name) == nvinfer1::DataType::kFLOAT);
auto output_shape = context->getTensorShape(output_name); 
size_t output_size = getMemorySize(output_shape, sizeof(float));


void* input_mem{nullptr};
if(cudaMalloc(&input_mem, input_size) != cudaSuccess) 
{
     fprintf(stderr, "ERROR: cudaMalloc for input_mem failed. Size=%zu bytes. ",
        input_size);
    return false;  
}

void* output_mem{nullptr};
if (cudaMalloc(&output_mem, output_size) != cudaSuccess)
{
    fprintf(stderr, "ERROR: cudaMalloc for output_mem failed. Size=%zu bytes. ",
        output_size);
    return false;  
}

std::vector<float> hostInput;
if (!readImageToNCHWFloat(imagePath, hostInput, input_shape.d[1], input_shape.d[2], input_shape.d[3],true)) {
    std::cerr << "Failed to read image\n";
    return false;
}
printf("image size: %d\n", hostInput.size());
// std::vector<float> hostInput(input_size / sizeof(float), 1.0f);
// printf("image size: %d\n", hostInput.size());

cudaStream_t stream;
if (cudaStreamCreate(&stream) != cudaSuccess)
{
    std::cerr << "ERROR: cuda stream creation failed." << std::endl;
    return false;
}

context->setTensorAddress(input_name, input_mem);
context->setTensorAddress(output_name, output_mem);

// copy image data to input bindding memory
if(cudaMemcpyAsync(input_mem, hostInput.data(), input_size, cudaMemcpyHostToDevice, stream) != cudaSuccess)
{
    std::cerr << "ERROR: CUDA memory copy of input faile\n";
    return false;

}

cudaStreamSynchronize(stream);


// run tensorRT inference
bool status = context->enqueueV3(stream);
printf("status: %d\n",status);
if (!status)
{
    std::cerr  << "ERROR: TensorRT inference failed" << std::endl;
    // return false;
}

// Copy predictions from output binding memory
size_t output_num = output_size / sizeof(float);
auto output_buffer = std::unique_ptr<float[]>(new float[output_num]);

if (cudaMemcpyAsync(output_buffer.get(), output_mem, output_size, cudaMemcpyDeviceToHost, stream) != cudaSuccess)
{
    std::cerr  << "ERROR: CUDA memory copy of output failed, size = " << output_size << " bytes" << std::endl;
    return false;
}

cudaStreamSynchronize(stream);


// Free CUDA resources
cudaFree(input_mem);
cudaFree(output_mem);
return true;

}
output:
[TRT] Deserialization required 31975 microseconds.
[TRT] Total per-runner device persistent memory is 1536
[TRT] Total per-runner host persistent memory is 261856
[TRT] Allocated activation device memory of size 31109120
[TRT] CUDA lazy loading is enabled.
nbBindings name: input
nbBindings name: onnx::Softmax_504
nbBindings name: output
input dtype:0
size: 602112
output dtype:0
1, 6
image size: 150528
status: 0
ERROR: TensorRT inference failed

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions