Skip to content

deseriasized model failure of TensorRT 8.6.1.6 when running in C++ code on GPU v100 #3307

@mucaoshen

Description

@mucaoshen

Description

I tried to deserialized model in C++ code like this:
In test.h

#include <NvInfer.h>
#include <string>
#include <vector>
#include <memory>

#define CHECK(call, resContent) check(call, __LINE__, __FILE__, resContent)

inline bool check(cudaError_t e, int iLine, const char *szFile, std::string& resContent) {
	if (e != cudaSuccess) {
		resContent = "CUDA runtime API error ";
		resContent += std::string(cudaGetErrorName(e));
		resContent += " at line " + std::to_string(iLine);
		resContent += " in file " + std::string(szFile);
		resContent += "\n";
		// std::cout << "CUDA runtime API error " << cudaGetErrorName(e) << " at line " << iLine << " in file " << szFile << std::endl;
        return false;
	}
	resContent = "";
	return true;
};

class TRTLogger: public nvinfer1::ILogger {
public:
	nvinfer1::ILogger::Severity reportableServerity;

public:
	TRTLogger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kVERBOSE): reportableServerity(severity) {
	}
	void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override;

};

class B {
	public:
	virtual int modelLoad(const std::string& m_modelPath) = 0;

};

class A: public B {
	public:
	int modelLoad(const std::string& m_modelPath) override;
	static TRTLogger s_Logger;
	private:
	nvinfer1::ICudaEngine* m_engine;
};

int bytesToInteger(char* buffer) {
	return *reinterpret_cast<int*>(buffer);
}

In test.cpp

#include "test.h"
#include <iostream>
#include <iostream>
#include <fstream>
#include <vector>



void TRTLogger::log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept {
	if (severity > reportableServerity) {
		return;
	}
	switch (severity)
	{
	case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
		std::cout<<"INTERNAL_ERROR: " + std::string(msg)<<std::endl;
		break;

	case nvinfer1::ILogger::Severity::kERROR:
		std::cout<<"ERROR: " + std::string(msg)<<std::endl;
		break;

	case nvinfer1::ILogger::Severity::kWARNING:
		std::cout<<"WARNING: " + std::string(msg)<<std::endl;
		break;

	case nvinfer1::ILogger::Severity::kINFO:
		std::cout<<"INFO: " + std::string(msg)<<std::endl;
		break;
	
	default:
		std::cout<<"VERBOSE: " + std::string(msg)<<std::endl;
		break;
	}
}

TRTLogger A::s_Logger = TRTLogger();
int A::modelLoad(const std::string& m_modelPath) {
	std::string tmpLogStr;
	bool isSuccess = CHECK(cudaSetDevice(0), tmpLogStr);
	if (!isSuccess) {
		throw std::runtime_error("cuda set device in modelLoad unsuccessfully : " + tmpLogStr);
	}

	std::ifstream engineFile(m_modelPath, std::ios::binary);
	long int fsize = 0;
	// get file size
	std::cout<<"Parsing model file!"<<std::endl;
	engineFile.seekg(0, engineFile.end);
	fsize = engineFile.tellg();
	engineFile.seekg(0, engineFile.beg);
	// get meta info
	char* metaLenBytes;
	metaLenBytes = (char*)malloc(4);
	engineFile.read(metaLenBytes, 4);
	int metaLen = bytesToInteger(metaLenBytes);
	if (metaLenBytes != nullptr) free(metaLenBytes);
	// TODO: get meta json str
	engineFile.seekg(4, engineFile.beg);
	char* metaBytes;
	metaBytes = (char*)malloc(metaLen);
	engineFile.read(metaBytes, metaLen);
	if (metaBytes != nullptr) free(metaBytes);
	// get model info
	std::vector<char> engineStr(fsize - metaLen - 4);
	engineFile.seekg(metaLen + 4, engineFile.beg);
	engineFile.read(engineStr.data(), fsize - metaLen - 4);

	if (engineStr.size() == 0) {
		std::cout<<"Failed getting serialized engine!"<<std::endl;
		engineFile.close();
		return -1;
	}
	engineFile.close();
	std::cout<<"Succeeded getting serialized engine!"<<std::endl;

	// create inference env, deserialize engine
	nvinfer1::IRuntime* m_runtime {nvinfer1::createInferRuntime(s_Logger)};
	m_engine = m_runtime->deserializeCudaEngine(engineStr.data(), engineStr.size());
	if (m_engine == nullptr) {
		std::cout<<"Failed loading engine!"<<std::endl;
		return -1;
	}
	return 0;
}

and in main.cpp

#include <string>
#include "test.h"


int main() {
	std::string modelPath("../ResNet34_trackerOCR_36_450_20230627_half.engine");
	B* a = new A();
	int retCode = a->modelLoad(modelPath);
}

when i tried to run the code with model engine on GPU v100, and get an error like this log:

Parsing model file!
Succeeded getting serialized engine!
INFO: Loaded engine size: 47 MiB
ERROR: 1: [dispatchStubs.cpp::deserializeEngine::14] Error Code 1: Internal Error (Unexpected call to stub)
Failed loading engine!

I have tried writing the contents of test.cpp and test.h in main.cpp. At this time, the deserialization is unsuccessful.
This confuses me, I don't know what I'm doing wrong.
In addition, I also did corresponding tests in the python program and it was also normal.

Environment

TensorRT Version: 8.6.1.6

NVIDIA GPU: Tesla V100

NVIDIA Driver Version: 515.43.04

CUDA Version: 11.7.99

CUDNN Version: 8.9.2

Operating System: ubuntu16.04

Python Version (if applicable): 3.8

Tensorflow Version (if applicable): no use

PyTorch Version (if applicable): 1.13.1

Baremetal or Container (if so, version): no use

Relevant Files

For related codes and models, please refer to this link.

Steps To Reproduce

Commands or scripts:

$ mkdir build
$ cd build
$ cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ..
$ make
$ cd ../bin
$ ./main

Have you tried the latest release?: Yes

Can this model run on other frameworks? For example run ONNX model with ONNXRuntime (polygraphy run <model.onnx> --onnxrt): No. I think the onnx model is ok

Metadata

Metadata

Assignees

Labels

triagedIssue has been triaged by maintainers

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions