Skip to content

Commit 56a93a0

Browse files
authored
Add Compile API to set the location for the context binary file (microsoft#25356)
Add Compile API ModelCompilationOptions_SetEpContextBinaryInformation to set the folder path and model name so that the EP can get the right place to dump the [model_name]_[ep].bin file.
1 parent 56078fe commit 56a93a0

File tree

8 files changed

+115
-3
lines changed

8 files changed

+115
-3
lines changed

include/onnxruntime/core/session/onnxruntime_c_api.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6886,6 +6886,24 @@ struct OrtCompileApi {
68866886
*/
68876887
ORT_API2_STATUS(ModelCompilationOptions_SetFlags, _In_ OrtModelCompilationOptions* model_compile_options,
68886888
size_t flags);
6889+
6890+
/** Sets information related to EP context binary file.
6891+
*
6892+
* EP uses this information to decide the location and context binary file name.
6893+
* Used while compiling model with input and output in memory buffer
6894+
*
6895+
* \param[in] model_compile_options The OrtModelCompilationOptions instance.
6896+
* \param[in] output_directory Null terminated string of the path (wchar on Windows, char otherwise).
6897+
* \param[in] model_name Null terminated string of the model name (wchar on Windows, char otherwise).
6898+
*
6899+
* \snippet{doc} snippets.dox OrtStatus Return Value
6900+
*
6901+
* \since Version 1.23.
6902+
*/
6903+
ORT_API2_STATUS(ModelCompilationOptions_SetEpContextBinaryInformation,
6904+
_In_ OrtModelCompilationOptions* model_compile_options,
6905+
_In_ const ORTCHAR_T* output_directory,
6906+
_In_ const ORTCHAR_T* model_name);
68896907
};
68906908

68916909
/*

include/onnxruntime/core/session/onnxruntime_cxx_api.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,6 +1161,8 @@ struct ModelCompilationOptions : detail::Base<OrtModelCompilationOptions> {
11611161
size_t initializer_size_threshold); ///< Wraps OrtApi::ModelCompilationOptions_SetOutputModelExternalInitializersFile
11621162
ModelCompilationOptions& SetOutputModelBuffer(OrtAllocator* allocator, void** output_model_buffer_ptr,
11631163
size_t* output_model_buffer_size_ptr); ///< Wraps OrtApi::ModelCompilationOptions_SetOutputModelBuffer
1164+
ModelCompilationOptions& SetEpContextBinaryInformation(const ORTCHAR_T* output_directory,
1165+
const ORTCHAR_T* model_name); ///< Wraps OrtApi::ModelCompilationOptions_SetEpContextBinaryInformation
11641166
ModelCompilationOptions& SetFlags(size_t flags); ///< Wraps OrtApi::ModelCompilationOptions_SetFlags
11651167
};
11661168

include/onnxruntime/core/session/onnxruntime_cxx_inline.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,15 @@ inline ModelCompilationOptions& ModelCompilationOptions::SetOutputModelPath(
819819
return *this;
820820
}
821821

822+
inline ModelCompilationOptions& ModelCompilationOptions::SetEpContextBinaryInformation(
823+
const ORTCHAR_T* output_directory, const ORTCHAR_T* model_name) {
824+
Ort::ThrowOnError(GetCompileApi().ModelCompilationOptions_SetEpContextBinaryInformation(
825+
this->p_,
826+
output_directory,
827+
model_name));
828+
return *this;
829+
}
830+
822831
inline ModelCompilationOptions& ModelCompilationOptions::SetOutputModelExternalInitializersFile(
823832
const ORTCHAR_T* file_path, size_t initializer_size_threshold) {
824833
Ort::ThrowOnError(GetCompileApi().ModelCompilationOptions_SetOutputModelExternalInitializersFile(

onnxruntime/core/session/compile_api.cc

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,35 @@ ORT_API_STATUS_IMPL(OrtCompileAPI::ModelCompilationOptions_SetOutputModelPath,
128128
API_IMPL_END
129129
}
130130

131+
ORT_API_STATUS_IMPL(OrtCompileAPI::ModelCompilationOptions_SetEpContextBinaryInformation,
132+
_In_ OrtModelCompilationOptions* ort_model_compile_options,
133+
const ORTCHAR_T* output_directory,
134+
const ORTCHAR_T* model_name) {
135+
API_IMPL_BEGIN
136+
#if !defined(ORT_MINIMAL_BUILD)
137+
auto model_compile_options = reinterpret_cast<onnxruntime::ModelCompilationOptions*>(ort_model_compile_options);
138+
139+
std::string output_dir = PathToUTF8String(output_directory);
140+
if (output_dir.empty()) {
141+
return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Invalid output directory: path is empty");
142+
}
143+
144+
std::string model_name_str = ToUTF8String(model_name);
145+
if (model_name_str.empty()) {
146+
return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Invalid model name: string is empty");
147+
}
148+
149+
ORT_API_RETURN_IF_STATUS_NOT_OK(model_compile_options->SetEpContextBinaryInformation(output_dir, model_name_str));
150+
return nullptr;
151+
#else
152+
ORT_UNUSED_PARAMETER(ort_model_compile_options);
153+
ORT_UNUSED_PARAMETER(output_directory);
154+
ORT_UNUSED_PARAMETER(model_name);
155+
return OrtApis::CreateStatus(ORT_NOT_IMPLEMENTED, "Compile API is not supported in this build");
156+
#endif // !defined(ORT_MINIMAL_BUILD)
157+
API_IMPL_END
158+
}
159+
131160
ORT_API_STATUS_IMPL(OrtCompileAPI::ModelCompilationOptions_SetOutputModelExternalInitializersFile,
132161
_In_ OrtModelCompilationOptions* ort_model_compile_options,
133162
const ORTCHAR_T* external_initializers_file_path,
@@ -248,6 +277,7 @@ static constexpr OrtCompileApi ort_compile_api = {
248277
// End of Version 22 - DO NOT MODIFY ABOVE
249278

250279
&OrtCompileAPI::ModelCompilationOptions_SetFlags,
280+
&OrtCompileAPI::ModelCompilationOptions_SetEpContextBinaryInformation,
251281
};
252282

253283
// checks that we don't violate the rule that the functions must remain in the slots they were originally assigned

onnxruntime/core/session/compile_api.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,7 @@ ORT_API_STATUS_IMPL(ModelCompilationOptions_SetEpContextEmbedMode, _In_ OrtModel
3030
ORT_API_STATUS_IMPL(CompileModel, _In_ const OrtEnv* env, _In_ const OrtModelCompilationOptions* model_options);
3131
ORT_API_STATUS_IMPL(ModelCompilationOptions_SetFlags, _In_ OrtModelCompilationOptions* model_options,
3232
size_t flags);
33+
ORT_API_STATUS_IMPL(ModelCompilationOptions_SetEpContextBinaryInformation, _In_ OrtModelCompilationOptions* model_compile_options,
34+
_In_ const ORTCHAR_T* output_dir, _In_ const ORTCHAR_T* model_name);
3335

3436
} // namespace OrtCompileAPI

onnxruntime/core/session/model_compilation_options.cc

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ Status ModelCompilationOptions::SetOutputModelPath(const std::string& output_mod
7272
if (log_manager != nullptr && log_manager->HasDefaultLogger()) {
7373
const logging::Logger& logger = log_manager->DefaultLogger();
7474
LOGS(logger, WARNING) << "Output model path length (" << ep_context_gen_options.output_model_file_path.size()
75-
<< ") exceeds limit of " << ConfigOptions::kMaxKeyLength << " characters."
76-
<< "ORT will still generated the expected output file, but EPs will see an empty "
75+
<< ") exceeds limit of " << ConfigOptions::kMaxValueLength << " characters."
76+
<< "ORT will still generate the expected output file, but EPs will see an empty "
7777
<< "output model path in SessionOption's ConfigOptions.";
7878
}
7979
}
@@ -98,6 +98,36 @@ Status ModelCompilationOptions::SetOutputModelBuffer(onnxruntime::AllocatorPtr a
9898
return Status::OK();
9999
}
100100

101+
Status ModelCompilationOptions::SetEpContextBinaryInformation(const std::string& output_directory,
102+
const std::string& model_name) {
103+
if (output_directory.empty() || model_name.empty()) {
104+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "output_dir or model_name is empty.");
105+
}
106+
107+
std::filesystem::path output_dir_path(output_directory);
108+
if (output_dir_path.has_filename() && output_dir_path.extension() == "") {
109+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "output_dir is not a valid directory.");
110+
}
111+
112+
std::filesystem::path ctx_model_path = output_directory / std::filesystem::path(model_name);
113+
114+
if (ctx_model_path.string().size() <= ConfigOptions::kMaxValueLength) {
115+
ORT_RETURN_IF_ERROR(session_options_.value.config_options.AddConfigEntry(kOrtSessionOptionEpContextFilePath,
116+
ctx_model_path.string().c_str()));
117+
} else {
118+
logging::LoggingManager* log_manager = env_.GetLoggingManager();
119+
if (log_manager != nullptr && log_manager->HasDefaultLogger()) {
120+
const logging::Logger& logger = log_manager->DefaultLogger();
121+
LOGS(logger, WARNING) << "output_directory length with model_name length together exceeds limit of "
122+
<< ConfigOptions::kMaxValueLength << " characters."
123+
<< "ORT will still generate the expected output file, but EPs will see an empty "
124+
<< "output path in SessionOption's ConfigOptions.";
125+
}
126+
}
127+
128+
return Status::OK();
129+
}
130+
101131
Status ModelCompilationOptions::SetEpContextEmbedMode(bool embed_ep_context_in_model) {
102132
ORT_RETURN_IF_ERROR(session_options_.value.config_options.AddConfigEntry(
103133
kOrtSessionOptionEpContextEmbedMode, embed_ep_context_in_model ? "1" : "0"));
@@ -146,7 +176,7 @@ Status ModelCompilationOptions::ResetOutputModelSettings() {
146176
ep_context_gen_options.output_model_buffer_ptr = nullptr;
147177
ep_context_gen_options.output_model_buffer_size_ptr = nullptr;
148178
ep_context_gen_options.output_model_buffer_allocator = nullptr;
149-
return session_options_.value.config_options.AddConfigEntry(kOrtSessionOptionEpContextFilePath, "");
179+
return Status::OK();
150180
}
151181

152182
Status ModelCompilationOptions::CheckInputModelSettings() const {

onnxruntime/core/session/model_compilation_options.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,16 @@ class ModelCompilationOptions {
7272
Status SetOutputModelBuffer(onnxruntime::AllocatorPtr allocator, void** output_model_buffer_ptr,
7373
size_t* output_model_buffer_size_ptr);
7474

75+
/// <summary>
76+
/// Sets information relate to EP context binary file.
77+
/// EP use this information to decide the location and context binary file name.
78+
/// Used while compiling model with input and output in memory buffer
79+
/// </summary>
80+
/// <param name="output_directory">The folder path to the generated context binary file</param>
81+
/// <param name="model_name">Model name used to decide the context binary file name: [model_name]_[ep].bin</param>
82+
/// <returns>Status indicating potential error</returns>
83+
Status SetEpContextBinaryInformation(const std::string& output_directory, const std::string& model_name);
84+
7585
/// <summary>
7686
/// Enables or disables the embedding of EPContext binary data into the `ep_cache_context` attribute of EPContext
7787
/// nodes. Defaults to false (dumped to file).

onnxruntime/test/providers/qnn/qnn_ep_context_test.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,11 @@ TEST_F(QnnHTPBackendTests, CompileApi_FromSessionOptions_InputAndOutputModelsInB
509509
Ort::ModelCompilationOptions compile_options(*ort_env, session_options);
510510
compile_options.SetInputModelFromBuffer(reinterpret_cast<const void*>(model_data.data()), model_data.size());
511511
compile_options.SetOutputModelBuffer(allocator, &output_model_buffer, &output_model_buffer_size);
512+
std::string target_dir = "./testdata/";
513+
std::string model_name = "test_model_in_mem.onnx";
514+
auto pos = model_name.rfind(".onnx");
515+
std::string bin_file_name = model_name.substr(0, pos) + "_qnn.bin";
516+
compile_options.SetEpContextBinaryInformation(ToWideString(target_dir).c_str(), ToWideString(model_name).c_str());
512517
compile_options.SetEpContextEmbedMode(false);
513518

514519
// Compile the model.
@@ -519,12 +524,18 @@ TEST_F(QnnHTPBackendTests, CompileApi_FromSessionOptions_InputAndOutputModelsInB
519524
ASSERT_TRUE(output_model_buffer != nullptr);
520525
ASSERT_TRUE(output_model_buffer_size > 0);
521526

527+
ASSERT_TRUE(std::filesystem::exists(target_dir + bin_file_name)) << "expected context binary file should exist";
528+
522529
// Check that the compiled model has the expected number of EPContext nodes.
523530
CheckEpContextNodeCounts(output_model_buffer, output_model_buffer_size, 2, 2);
524531

532+
// Add session option "ep.context_file_path" so that the session can use it to locate the [model_name]_qnn.bin file
533+
std::string ctx_model = target_dir + model_name;
534+
session_options.AddConfigEntry(kOrtSessionOptionEpContextFilePath, ctx_model.c_str());
525535
// Should be able to create a session with the compiled model and the original session options.
526536
EXPECT_NO_THROW((Ort::Session(*ort_env, output_model_buffer, output_model_buffer_size, session_options)));
527537

538+
std::filesystem::remove(target_dir + bin_file_name);
528539
allocator.Free(output_model_buffer);
529540
}
530541
}

0 commit comments

Comments
 (0)