Skip to content

Commit cced33b

Browse files
Add model compilation in ORT perf test (#25797)
This PR adds model compilation support in ort perf test * Add `--compile_ep_context` flag in onnxruntime_perf_test. This generates EP context model and prints out compilation time and perf statistics for the compiled model * Prints `Compile time cost` for EP context compilation Sample usage: ```sh $ ./onnxruntime_perf_test -e trt-rtx -I -r 1 "/path/to/model.onnx" --compile_ep_context --compile_model_path "/path/to/model_ctx.onnx" ``` Output: ```sh Compile time cost: 17.8012 s Session creation time cost: 0.966619 s First inference time cost: 8151 ms Total inference time cost: 8.08084 s Total inference requests: 1 Average inference time cost: 8080.84 ms Total inference run time: 8.08085 s Number of inferences per second: 0.123749 Avg CPU usage: 6 % Peak working set size: 7861874688 bytes Avg CPU usage:6 Peak working set size:7861874688 Runs:1 Min Latency: 8.08084 s Max Latency: 8.08084 s P50 Latency: 8.08084 s P90 Latency: 8.08084 s P95 Latency: 8.08084 s P99 Latency: 8.08084 s P999 Latency: 8.08084 s ```
1 parent d3096cd commit cced33b

File tree

3 files changed

+88
-15
lines changed

3 files changed

+88
-15
lines changed

onnxruntime/test/perftest/command_args_parser.cc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,9 @@ ABSL_FLAG(std::string, plugin_ep_options, "",
171171
"--plugin_ep_options \"ep_1_option_1_key|ep_1_option_1_value ...;;ep_3_option_1_key|ep_3_option_1_value ...;... \"");
172172
ABSL_FLAG(bool, list_ep_devices, false, "Prints all available device indices and their properties (including metadata). This option makes the program exit early without performing inference.\n");
173173
ABSL_FLAG(std::string, select_ep_devices, "", "Specifies a semicolon-separated list of device indices to add to the session and run with.");
174+
ABSL_FLAG(bool, compile_ep_context, DefaultPerformanceTestConfig().run_config.compile_ep_context, "Generate an EP context model");
175+
ABSL_FLAG(std::string, compile_model_path, "model_ctx.onnx", "The compiled model path for saving EP context model. Overwrites if already exists");
176+
ABSL_FLAG(bool, compile_binary_embed, DefaultPerformanceTestConfig().run_config.compile_binary_embed, "Embed binary blob within EP context node");
174177
ABSL_FLAG(bool, h, false, "Print program usage.");
175178

176179
namespace onnxruntime {
@@ -487,6 +490,16 @@ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int a
487490
if (!select_ep_devices.empty()) test_config.selected_ep_device_indices = select_ep_devices;
488491
}
489492

493+
// --compile_ep_context
494+
test_config.run_config.compile_ep_context = absl::GetFlag(FLAGS_compile_ep_context);
495+
496+
// --compile_model_path
497+
const auto& compile_model_path = absl::GetFlag(FLAGS_compile_model_path);
498+
test_config.run_config.compile_model_path = ToPathString(compile_model_path);
499+
500+
// --compile_binary_embed
501+
test_config.run_config.compile_binary_embed = absl::GetFlag(FLAGS_compile_binary_embed);
502+
490503
if (positional.size() == 2) {
491504
test_config.model_info.model_file_path = ToPathString(positional[1]);
492505
test_config.run_config.f_dump_statistics = true;

onnxruntime/test/perftest/main.cc

Lines changed: 72 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
using namespace onnxruntime;
1414
const OrtApi* g_ort = NULL;
1515

16+
int RunPerfTest(Ort::Env& env, const perftest::PerformanceTestConfig& test_config);
17+
Ort::Status CompileEpContextModel(const Ort::Env& env, const perftest::PerformanceTestConfig& test_config);
18+
1619
#ifdef _WIN32
1720
int real_main(int argc, wchar_t* argv[]) {
1821
#else
@@ -67,24 +70,27 @@ int real_main(int argc, char* argv[]) {
6770
return 0;
6871
}
6972

70-
std::random_device rd;
71-
perftest::PerformanceRunner perf_runner(env, test_config, rd);
73+
int status = 0;
7274

73-
// Exit if user enabled -n option so that user can measure session creation time
74-
if (test_config.run_config.exit_after_session_creation) {
75-
perf_runner.LogSessionCreationTime();
76-
return 0;
77-
}
75+
// EP context perf test
76+
if (test_config.run_config.compile_ep_context) {
77+
{
78+
std::cout << "\n> Compiling model...\n";
79+
auto compile_status = CompileEpContextModel(env, test_config);
7880

79-
auto status = perf_runner.Run();
80-
if (!status.IsOK()) {
81-
printf("Run failed:%s\n", status.ErrorMessage().c_str());
82-
return -1;
83-
}
84-
85-
perf_runner.SerializeResult();
81+
if (!compile_status.IsOK())
82+
return -1;
83+
}
8684

87-
return 0;
85+
{
86+
test_config.model_info.model_file_path = test_config.run_config.compile_model_path;
87+
status = RunPerfTest(env, test_config);
88+
}
89+
} else {
90+
// regular perf test
91+
status = RunPerfTest(env, test_config);
92+
}
93+
return status;
8894
}
8995

9096
#ifdef _WIN32
@@ -107,3 +113,54 @@ int main(int argc, char* argv[]) {
107113

108114
return retval;
109115
}
116+
117+
int RunPerfTest(Ort::Env& env, const perftest::PerformanceTestConfig& test_config) {
118+
std::random_device rd;
119+
perftest::PerformanceRunner perf_runner(env, test_config, rd);
120+
121+
// Exit if user enabled -n option so that user can measure session creation time
122+
if (test_config.run_config.exit_after_session_creation) {
123+
perf_runner.LogSessionCreationTime();
124+
return 0;
125+
}
126+
127+
auto status = perf_runner.Run();
128+
if (!status.IsOK()) {
129+
printf("Run failed:%s\n", status.ErrorMessage().c_str());
130+
return -1;
131+
}
132+
133+
perf_runner.SerializeResult();
134+
return 0;
135+
}
136+
137+
Ort::Status CompileEpContextModel(const Ort::Env& env, const perftest::PerformanceTestConfig& test_config) {
138+
auto output_ctx_model_path = test_config.run_config.compile_model_path;
139+
const auto provider_name = test_config.machine_config.provider_type_name;
140+
141+
Ort::SessionOptions session_options;
142+
143+
std::unordered_map<std::string, std::string> provider_options;
144+
session_options.AppendExecutionProvider(provider_name, provider_options);
145+
146+
Ort::ModelCompilationOptions model_compile_options(env, session_options);
147+
model_compile_options.SetEpContextEmbedMode(test_config.run_config.compile_binary_embed);
148+
model_compile_options.SetInputModelPath(test_config.model_info.model_file_path.c_str());
149+
model_compile_options.SetOutputModelPath(output_ctx_model_path.c_str());
150+
151+
Ort::Status status;
152+
std::chrono::duration<double> compile_duration;
153+
{
154+
auto compile_time_start = std::chrono::high_resolution_clock::now();
155+
status = Ort::CompileModel(env, model_compile_options);
156+
auto compile_time_end = std::chrono::high_resolution_clock::now();
157+
compile_duration = compile_time_end - compile_time_start;
158+
}
159+
160+
if (!status.IsOK()) {
161+
std::cout << "Failed to compile model: " << status.GetErrorMessage() << std::endl;
162+
} else {
163+
std::cout << "Compile time cost: " << compile_duration.count() << " s\n";
164+
}
165+
return status;
166+
}

onnxruntime/test/perftest/test_configuration.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ struct RunConfig {
6969
std::basic_string<ORTCHAR_T> register_custom_op_path;
7070
bool enable_cuda_io_binding{false};
7171
bool use_extensions = false;
72+
bool compile_ep_context{false};
73+
std::basic_string<ORTCHAR_T> compile_model_path;
74+
bool compile_binary_embed{false};
7275
};
7376

7477
struct PerformanceTestConfig {

0 commit comments

Comments
 (0)