deseriasized model failure of TensorRT 8.6.1.6 when running  in C++ code on GPU v100

## Description
I tried to deserialized model in C++ code like this:
In `test.h`
```c++
#include <NvInfer.h>
#include <string>
#include <vector>
#include <memory>

#define CHECK(call, resContent) check(call, __LINE__, __FILE__, resContent)

inline bool check(cudaError_t e, int iLine, const char *szFile, std::string& resContent) {
	if (e != cudaSuccess) {
		resContent = "CUDA runtime API error ";
		resContent += std::string(cudaGetErrorName(e));
		resContent += " at line " + std::to_string(iLine);
		resContent += " in file " + std::string(szFile);
		resContent += "\n";
		// std::cout << "CUDA runtime API error " << cudaGetErrorName(e) << " at line " << iLine << " in file " << szFile << std::endl;
        return false;
	}
	resContent = "";
	return true;
};

class TRTLogger: public nvinfer1::ILogger {
public:
	nvinfer1::ILogger::Severity reportableServerity;

public:
	TRTLogger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kVERBOSE): reportableServerity(severity) {
	}
	void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override;

};

class B {
	public:
	virtual int modelLoad(const std::string& m_modelPath) = 0;

};

class A: public B {
	public:
	int modelLoad(const std::string& m_modelPath) override;
	static TRTLogger s_Logger;
	private:
	nvinfer1::ICudaEngine* m_engine;
};

int bytesToInteger(char* buffer) {
	return *reinterpret_cast<int*>(buffer);
}
``` 
In `test.cpp`
```c++
#include "test.h"
#include <iostream>
#include <iostream>
#include <fstream>
#include <vector>



void TRTLogger::log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept {
	if (severity > reportableServerity) {
		return;
	}
	switch (severity)
	{
	case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
		std::cout<<"INTERNAL_ERROR: " + std::string(msg)<<std::endl;
		break;

	case nvinfer1::ILogger::Severity::kERROR:
		std::cout<<"ERROR: " + std::string(msg)<<std::endl;
		break;

	case nvinfer1::ILogger::Severity::kWARNING:
		std::cout<<"WARNING: " + std::string(msg)<<std::endl;
		break;

	case nvinfer1::ILogger::Severity::kINFO:
		std::cout<<"INFO: " + std::string(msg)<<std::endl;
		break;
	
	default:
		std::cout<<"VERBOSE: " + std::string(msg)<<std::endl;
		break;
	}
}

TRTLogger A::s_Logger = TRTLogger();
int A::modelLoad(const std::string& m_modelPath) {
	std::string tmpLogStr;
	bool isSuccess = CHECK(cudaSetDevice(0), tmpLogStr);
	if (!isSuccess) {
		throw std::runtime_error("cuda set device in modelLoad unsuccessfully : " + tmpLogStr);
	}

	std::ifstream engineFile(m_modelPath, std::ios::binary);
	long int fsize = 0;
	// get file size
	std::cout<<"Parsing model file!"<<std::endl;
	engineFile.seekg(0, engineFile.end);
	fsize = engineFile.tellg();
	engineFile.seekg(0, engineFile.beg);
	// get meta info
	char* metaLenBytes;
	metaLenBytes = (char*)malloc(4);
	engineFile.read(metaLenBytes, 4);
	int metaLen = bytesToInteger(metaLenBytes);
	if (metaLenBytes != nullptr) free(metaLenBytes);
	// TODO: get meta json str
	engineFile.seekg(4, engineFile.beg);
	char* metaBytes;
	metaBytes = (char*)malloc(metaLen);
	engineFile.read(metaBytes, metaLen);
	if (metaBytes != nullptr) free(metaBytes);
	// get model info
	std::vector<char> engineStr(fsize - metaLen - 4);
	engineFile.seekg(metaLen + 4, engineFile.beg);
	engineFile.read(engineStr.data(), fsize - metaLen - 4);

	if (engineStr.size() == 0) {
		std::cout<<"Failed getting serialized engine!"<<std::endl;
		engineFile.close();
		return -1;
	}
	engineFile.close();
	std::cout<<"Succeeded getting serialized engine!"<<std::endl;

	// create inference env, deserialize engine
	nvinfer1::IRuntime* m_runtime {nvinfer1::createInferRuntime(s_Logger)};
	m_engine = m_runtime->deserializeCudaEngine(engineStr.data(), engineStr.size());
	if (m_engine == nullptr) {
		std::cout<<"Failed loading engine!"<<std::endl;
		return -1;
	}
	return 0;
}
``` 
and in` main.cpp`
```c++
#include <string>
#include "test.h"


int main() {
	std::string modelPath("../ResNet34_trackerOCR_36_450_20230627_half.engine");
	B* a = new A();
	int retCode = a->modelLoad(modelPath);
}
```
when i tried to run the code with model engine on GPU v100, and get an error like this log:
```C++
Parsing model file!
Succeeded getting serialized engine!
INFO: Loaded engine size: 47 MiB
ERROR: 1: [dispatchStubs.cpp::deserializeEngine::14] Error Code 1: Internal Error (Unexpected call to stub)
Failed loading engine!
```
I have tried writing the contents of `test.cpp` and `test.h` in `main.cpp`. At this time, the deserialization is unsuccessful.
This confuses me, I don't know what I'm doing wrong.
In addition, I also did corresponding tests in the python program and it was also normal.



## Environment



**TensorRT Version**: 8.6.1.6

**NVIDIA GPU**: Tesla V100

**NVIDIA Driver Version**:  515.43.04 

**CUDA Version**: 11.7.99

**CUDNN Version**: 8.9.2


Operating System: ubuntu16.04

Python Version (if applicable): 3.8

Tensorflow Version (if applicable): no use

PyTorch Version (if applicable): 1.13.1

Baremetal or Container (if so, version): no use


## Relevant Files
For related codes and models, please refer to this [link](https://github.com/mucaoshen/test_tensorrt_cpp_load/tree/main).


## Steps To Reproduce



**Commands or scripts**:
```bash
$ mkdir build
$ cd build
$ cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ..
$ make
$ cd ../bin
$ ./main
```

**Have you tried [the latest release](https://developer.nvidia.com/tensorrt)?**: Yes

**Can this model run on other frameworks?** For example run ONNX model with ONNXRuntime (`polygraphy run <model.onnx> --onnxrt`): No. I think the onnx model is ok

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

deseriasized model failure of TensorRT 8.6.1.6 when running in C++ code on GPU v100 #3307

Description

Environment

Relevant Files

Steps To Reproduce

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

deseriasized model failure of TensorRT 8.6.1.6 when running in C++ code on GPU v100 #3307

Description

Description

Environment

Relevant Files

Steps To Reproduce

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions