Skip to content

Commit daa0306

Browse files
authored
[TRT RTX EP] Memory map the engine buffer (microsoft#25909)
### Description Change from fread to mmap to save on system memory. This also accelerated the load time of a ~4GB model in my testing by 1.5X.
1 parent 2705d4b commit daa0306

File tree

1 file changed

+13
-7
lines changed

1 file changed

+13
-7
lines changed

onnxruntime/core/providers/nv_tensorrt_rtx/onnx_ctx_model_helper.cc

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -311,13 +311,19 @@ Status TensorRTCacheModelHandler::GetEpContextFromGraph(const Node& node) {
311311
". Please make sure engine cache is in the same directory or sub-directory of context model.");
312312
}
313313

314-
std::ifstream engine_file(engine_cache_path.string(), std::ios::binary | std::ios::in);
315-
engine_file.seekg(0, std::ios::end);
316-
size_t engine_size = engine_file.tellg();
317-
engine_file.seekg(0, std::ios::beg);
318-
std::unique_ptr<char[]> engine_buf{new char[engine_size]};
319-
engine_file.read((char*)engine_buf.get(), engine_size);
320-
*(trt_engine_) = std::unique_ptr<nvinfer1::ICudaEngine>(trt_runtime_->deserializeCudaEngine(engine_buf.get(), engine_size));
314+
size_t file_length = 0;
315+
auto path_str = ToPathString(engine_cache_path.string());
316+
317+
Env::MappedMemoryPtr engine_buf;
318+
const auto& env = GetDefaultEnv();
319+
ORT_RETURN_IF_ERROR(env.GetFileLength(path_str.c_str(), file_length));
320+
if (!file_length) {
321+
return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
322+
"Nv EP could not read engine from cache: " + engine_cache_path.string());
323+
}
324+
ORT_RETURN_IF_ERROR(env.MapFileIntoMemory(path_str.c_str(), 0, file_length, engine_buf));
325+
326+
*(trt_engine_) = std::unique_ptr<nvinfer1::ICudaEngine>(trt_runtime_->deserializeCudaEngine(engine_buf.get(), file_length));
321327
if (!(*trt_engine_)) {
322328
return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
323329
"Nv EP could not deserialize engine from cache: " + engine_cache_path.string());

0 commit comments

Comments
 (0)