-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Description
Description
I tried to deserialized model in C++ code like this:
In test.h
#include <NvInfer.h>
#include <string>
#include <vector>
#include <memory>
#define CHECK(call, resContent) check(call, __LINE__, __FILE__, resContent)
inline bool check(cudaError_t e, int iLine, const char *szFile, std::string& resContent) {
if (e != cudaSuccess) {
resContent = "CUDA runtime API error ";
resContent += std::string(cudaGetErrorName(e));
resContent += " at line " + std::to_string(iLine);
resContent += " in file " + std::string(szFile);
resContent += "\n";
// std::cout << "CUDA runtime API error " << cudaGetErrorName(e) << " at line " << iLine << " in file " << szFile << std::endl;
return false;
}
resContent = "";
return true;
};
class TRTLogger: public nvinfer1::ILogger {
public:
nvinfer1::ILogger::Severity reportableServerity;
public:
TRTLogger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kVERBOSE): reportableServerity(severity) {
}
void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override;
};
class B {
public:
virtual int modelLoad(const std::string& m_modelPath) = 0;
};
class A: public B {
public:
int modelLoad(const std::string& m_modelPath) override;
static TRTLogger s_Logger;
private:
nvinfer1::ICudaEngine* m_engine;
};
int bytesToInteger(char* buffer) {
return *reinterpret_cast<int*>(buffer);
}In test.cpp
#include "test.h"
#include <iostream>
#include <iostream>
#include <fstream>
#include <vector>
void TRTLogger::log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept {
if (severity > reportableServerity) {
return;
}
switch (severity)
{
case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
std::cout<<"INTERNAL_ERROR: " + std::string(msg)<<std::endl;
break;
case nvinfer1::ILogger::Severity::kERROR:
std::cout<<"ERROR: " + std::string(msg)<<std::endl;
break;
case nvinfer1::ILogger::Severity::kWARNING:
std::cout<<"WARNING: " + std::string(msg)<<std::endl;
break;
case nvinfer1::ILogger::Severity::kINFO:
std::cout<<"INFO: " + std::string(msg)<<std::endl;
break;
default:
std::cout<<"VERBOSE: " + std::string(msg)<<std::endl;
break;
}
}
TRTLogger A::s_Logger = TRTLogger();
int A::modelLoad(const std::string& m_modelPath) {
std::string tmpLogStr;
bool isSuccess = CHECK(cudaSetDevice(0), tmpLogStr);
if (!isSuccess) {
throw std::runtime_error("cuda set device in modelLoad unsuccessfully : " + tmpLogStr);
}
std::ifstream engineFile(m_modelPath, std::ios::binary);
long int fsize = 0;
// get file size
std::cout<<"Parsing model file!"<<std::endl;
engineFile.seekg(0, engineFile.end);
fsize = engineFile.tellg();
engineFile.seekg(0, engineFile.beg);
// get meta info
char* metaLenBytes;
metaLenBytes = (char*)malloc(4);
engineFile.read(metaLenBytes, 4);
int metaLen = bytesToInteger(metaLenBytes);
if (metaLenBytes != nullptr) free(metaLenBytes);
// TODO: get meta json str
engineFile.seekg(4, engineFile.beg);
char* metaBytes;
metaBytes = (char*)malloc(metaLen);
engineFile.read(metaBytes, metaLen);
if (metaBytes != nullptr) free(metaBytes);
// get model info
std::vector<char> engineStr(fsize - metaLen - 4);
engineFile.seekg(metaLen + 4, engineFile.beg);
engineFile.read(engineStr.data(), fsize - metaLen - 4);
if (engineStr.size() == 0) {
std::cout<<"Failed getting serialized engine!"<<std::endl;
engineFile.close();
return -1;
}
engineFile.close();
std::cout<<"Succeeded getting serialized engine!"<<std::endl;
// create inference env, deserialize engine
nvinfer1::IRuntime* m_runtime {nvinfer1::createInferRuntime(s_Logger)};
m_engine = m_runtime->deserializeCudaEngine(engineStr.data(), engineStr.size());
if (m_engine == nullptr) {
std::cout<<"Failed loading engine!"<<std::endl;
return -1;
}
return 0;
}and in main.cpp
#include <string>
#include "test.h"
int main() {
std::string modelPath("../ResNet34_trackerOCR_36_450_20230627_half.engine");
B* a = new A();
int retCode = a->modelLoad(modelPath);
}when i tried to run the code with model engine on GPU v100, and get an error like this log:
Parsing model file!
Succeeded getting serialized engine!
INFO: Loaded engine size: 47 MiB
ERROR: 1: [dispatchStubs.cpp::deserializeEngine::14] Error Code 1: Internal Error (Unexpected call to stub)
Failed loading engine!I have tried writing the contents of test.cpp and test.h in main.cpp. At this time, the deserialization is unsuccessful.
This confuses me, I don't know what I'm doing wrong.
In addition, I also did corresponding tests in the python program and it was also normal.
Environment
TensorRT Version: 8.6.1.6
NVIDIA GPU: Tesla V100
NVIDIA Driver Version: 515.43.04
CUDA Version: 11.7.99
CUDNN Version: 8.9.2
Operating System: ubuntu16.04
Python Version (if applicable): 3.8
Tensorflow Version (if applicable): no use
PyTorch Version (if applicable): 1.13.1
Baremetal or Container (if so, version): no use
Relevant Files
For related codes and models, please refer to this link.
Steps To Reproduce
Commands or scripts:
$ mkdir build
$ cd build
$ cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ..
$ make
$ cd ../bin
$ ./mainHave you tried the latest release?: Yes
Can this model run on other frameworks? For example run ONNX model with ONNXRuntime (polygraphy run <model.onnx> --onnxrt): No. I think the onnx model is ok